Update.
[glibc.git] / time / strptime.c
blob26f77ebbdde144aa9291e45988f2f92e5bb5db9b
1 /* Convert a string representation of time to a time value.
2 Copyright (C) 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Library General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public
17 License along with the GNU C Library; see the file COPYING.LIB. If not,
18 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 /* XXX This version of the implementation is not really complete.
22 Some of the fields cannot add information alone. But if seeing
23 some of them in the same format (such as year, week and weekday)
24 this is enough information for determining the date. */
26 #ifdef HAVE_CONFIG_H
27 # include <config.h>
28 #endif
30 #include <ctype.h>
31 #include <langinfo.h>
32 #include <limits.h>
33 #include <string.h>
34 #include <time.h>
36 #ifdef _LIBC
37 # include "../locale/localeinfo.h"
38 #endif
41 #ifndef __P
42 # if defined (__GNUC__) || (defined (__STDC__) && __STDC__)
43 # define __P(args) args
44 # else
45 # define __P(args) ()
46 # endif /* GCC. */
47 #endif /* Not __P. */
50 #if ! HAVE_LOCALTIME_R && ! defined localtime_r
51 # ifdef _LIBC
52 # define localtime_r __localtime_r
53 # else
54 /* Approximate localtime_r as best we can in its absence. */
55 # define localtime_r my_localtime_r
56 static struct tm *localtime_r __P ((const time_t *, struct tm *));
57 static struct tm *
58 localtime_r (t, tp)
59 const time_t *t;
60 struct tm *tp;
62 struct tm *l = localtime (t);
63 if (! l)
64 return 0;
65 *tp = *l;
66 return tp;
68 # endif /* ! _LIBC */
69 #endif /* ! HAVE_LOCALTIME_R && ! defined (localtime_r) */
72 #define match_char(ch1, ch2) if (ch1 != ch2) return NULL
73 #if defined __GNUC__ && __GNUC__ >= 2
74 # define match_string(cs1, s2) \
75 ({ size_t len = strlen (cs1); \
76 int result = strncasecmp ((cs1), (s2), len) == 0; \
77 if (result) (s2) += len; \
78 result; })
79 #else
80 /* Oh come on. Get a reasonable compiler. */
81 # define match_string(cs1, s2) \
82 (strncasecmp ((cs1), (s2), strlen (cs1)) ? 0 : ((s2) += strlen (cs1), 1))
83 #endif
84 /* We intentionally do not use isdigit() for testing because this will
85 lead to problems with the wide character version. */
86 #define get_number(from, to) \
87 do { \
88 val = 0; \
89 while (*rp == ' ') \
90 ++rp; \
91 if (*rp < '0' || *rp > '9') \
92 return NULL; \
93 do { \
94 val *= 10; \
95 val += *rp++ - '0'; \
96 } while (val * 10 <= to && *rp >= '0' && *rp <= '9'); \
97 if (val < from || val > to) \
98 return NULL; \
99 } while (0)
100 #ifdef _NL_CURRENT
101 # define get_alt_number(from, to) \
102 do { \
103 if (*decided != raw) \
105 const char *alts = _NL_CURRENT (LC_TIME, ALT_DIGITS); \
106 val = 0; \
107 while (*alts != '\0') \
109 size_t len = strlen (alts); \
110 if (strncasecmp (alts, rp, len) == 0) \
111 break; \
112 alts = strchr (alts, '\0') + 1; \
113 ++val; \
115 if (*alts == '\0') \
117 if (*decided == loc && val != 0) \
118 return NULL; \
120 else \
122 *decided = loc; \
123 break; \
126 get_number (from, to); \
127 } while (0)
128 #else
129 # define get_alt_number(from, to) \
130 /* We don't have the alternate representation. */ \
131 get_number(from, to)
132 #endif
133 #define recursive(new_fmt) \
134 (*(new_fmt) != '\0' \
135 && (rp = strptime_internal (rp, (new_fmt), tm, decided)) != NULL)
138 #ifdef _LIBC
139 /* This is defined in locale/C-time.c in the GNU libc. */
140 extern const struct locale_data _nl_C_LC_TIME;
141 extern const unsigned short int __mon_yday[2][13];
143 # define weekday_name (&_nl_C_LC_TIME.values[_NL_ITEM_INDEX (DAY_1)].string)
144 # define ab_weekday_name \
145 (&_nl_C_LC_TIME.values[_NL_ITEM_INDEX (ABDAY_1)].string)
146 # define month_name (&_nl_C_LC_TIME.values[_NL_ITEM_INDEX (MON_1)].string)
147 # define ab_month_name (&_nl_C_LC_TIME.values[_NL_ITEM_INDEX (ABMON_1)].string)
148 # define HERE_D_T_FMT (_nl_C_LC_TIME.values[_NL_ITEM_INDEX (D_T_FMT)].string)
149 # define HERE_D_FMT (_nl_C_LC_TIME.values[_NL_ITEM_INDEX (D_FMT)].string)
150 # define HERE_AM_STR (_nl_C_LC_TIME.values[_NL_ITEM_INDEX (AM_STR)].string)
151 # define HERE_PM_STR (_nl_C_LC_TIME.values[_NL_ITEM_INDEX (PM_STR)].string)
152 # define HERE_T_FMT_AMPM \
153 (_nl_C_LC_TIME.values[_NL_ITEM_INDEX (T_FMT_AMPM)].string)
154 # define HERE_T_FMT (_nl_C_LC_TIME.values[_NL_ITEM_INDEX (T_FMT)].string)
156 # define strncasecmp(s1, s2, n) __strncasecmp (s1, s2, n)
157 #else
158 static char const weekday_name[][10] =
160 "Sunday", "Monday", "Tuesday", "Wednesday",
161 "Thursday", "Friday", "Saturday"
163 static char const ab_weekday_name[][4] =
165 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
167 static char const month_name[][10] =
169 "January", "February", "March", "April", "May", "June",
170 "July", "August", "September", "October", "November", "December"
172 static char const ab_month_name[][4] =
174 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
175 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
177 # define HERE_D_T_FMT "%a %b %e %H:%M:%S %Y"
178 # define HERE_D_FMT "%m/%d/%y"
179 # define HERE_AM_STR "AM"
180 # define HERE_PM_STR "PM"
181 # define HERE_T_FMT_AMPM "%I:%M:%S %p"
182 # define HERE_T_FMT "%H:%M:%S"
184 const unsigned short int __mon_yday[1][13] =
186 /* Normal years. */
187 { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
188 /* Leap years. */
189 { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
191 #endif
193 /* Status of lookup: do we use the locale data or the raw data? */
194 enum locale_status { not, loc, raw };
197 #ifndef __isleap
198 /* Nonzero if YEAR is a leap year (every 4 years,
199 except every 100th isn't, and every 400th is). */
200 # define __isleap(year) \
201 ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
202 #endif
204 /* Compute the day of the week. */
205 static void
206 day_of_the_week (struct tm *tm)
208 /* We know that January 1st 1970 was a Thursday (= 4). Compute the
209 the difference between this data in the one on TM and so determine
210 the weekday. */
211 int corr_year = 1900 + tm->tm_year - (tm->tm_mon < 2);
212 int wday = (-473
213 + (365 * (tm->tm_year - 70))
214 + (corr_year / 4)
215 - ((corr_year / 4) / 25) + ((corr_year / 4) % 25 < 0)
216 + (((corr_year / 4) / 25) / 4)
217 + __mon_yday[0][tm->tm_mon]
218 + tm->tm_mday - 1);
219 tm->tm_wday = wday % 7;
222 /* Compute the day of the year. */
223 static void
224 day_of_the_year (struct tm *tm)
226 tm->tm_yday = (__mon_yday[__isleap (1900 + tm->tm_year)][tm->tm_mon]
227 + (tm->tm_mday - 1));
230 static char *
231 #ifdef _LIBC
232 internal_function
233 #endif
234 strptime_internal __P ((const char *buf, const char *format, struct tm *tm,
235 enum locale_status *decided));
237 static char *
238 #ifdef _LIBC
239 internal_function
240 #endif
241 strptime_internal (buf, format, tm, decided)
242 const char *buf;
243 const char *format;
244 struct tm *tm;
245 enum locale_status *decided;
247 const char *rp;
248 const char *fmt;
249 int cnt;
250 size_t val;
251 int have_I, is_pm;
252 int century, want_century;
253 int have_wday, want_xday;
254 int have_yday;
256 rp = buf;
257 fmt = format;
258 have_I = is_pm = 0;
259 century = -1;
260 want_century = 0;
261 have_wday = want_xday = have_yday = 0;
263 while (*fmt != '\0')
265 /* A white space in the format string matches 0 more or white
266 space in the input string. */
267 if (isspace (*fmt))
269 while (isspace (*rp))
270 ++rp;
271 ++fmt;
272 continue;
275 /* Any character but `%' must be matched by the same character
276 in the iput string. */
277 if (*fmt != '%')
279 match_char (*fmt++, *rp++);
280 continue;
283 ++fmt;
284 #ifndef _NL_CURRENT
285 /* We need this for handling the `E' modifier. */
286 start_over:
287 #endif
288 switch (*fmt++)
290 case '%':
291 /* Match the `%' character itself. */
292 match_char ('%', *rp++);
293 break;
294 case 'a':
295 case 'A':
296 /* Match day of week. */
297 for (cnt = 0; cnt < 7; ++cnt)
299 #ifdef _NL_CURRENT
300 if (*decided !=raw)
302 if (match_string (_NL_CURRENT (LC_TIME, DAY_1 + cnt), rp))
304 if (*decided == not
305 && strcmp (_NL_CURRENT (LC_TIME, DAY_1 + cnt),
306 weekday_name[cnt]))
307 *decided = loc;
308 break;
310 if (match_string (_NL_CURRENT (LC_TIME, ABDAY_1 + cnt), rp))
312 if (*decided == not
313 && strcmp (_NL_CURRENT (LC_TIME, ABDAY_1 + cnt),
314 ab_weekday_name[cnt]))
315 *decided = loc;
316 break;
319 #endif
320 if (*decided != loc
321 && (match_string (weekday_name[cnt], rp)
322 || match_string (ab_weekday_name[cnt], rp)))
324 *decided = raw;
325 break;
328 if (cnt == 7)
329 /* Does not match a weekday name. */
330 return NULL;
331 tm->tm_wday = cnt;
332 have_wday = 1;
333 break;
334 case 'b':
335 case 'B':
336 case 'h':
337 /* Match month name. */
338 for (cnt = 0; cnt < 12; ++cnt)
340 #ifdef _NL_CURRENT
341 if (*decided !=raw)
343 if (match_string (_NL_CURRENT (LC_TIME, MON_1 + cnt), rp))
345 if (*decided == not
346 && strcmp (_NL_CURRENT (LC_TIME, MON_1 + cnt),
347 month_name[cnt]))
348 *decided = loc;
349 break;
351 if (match_string (_NL_CURRENT (LC_TIME, ABMON_1 + cnt), rp))
353 if (*decided == not
354 && strcmp (_NL_CURRENT (LC_TIME, ABMON_1 + cnt),
355 ab_month_name[cnt]))
356 *decided = loc;
357 break;
360 #endif
361 if (match_string (month_name[cnt], rp)
362 || match_string (ab_month_name[cnt], rp))
364 *decided = raw;
365 break;
368 if (cnt == 12)
369 /* Does not match a month name. */
370 return NULL;
371 tm->tm_mon = cnt;
372 want_xday = 1;
373 break;
374 case 'c':
375 /* Match locale's date and time format. */
376 #ifdef _NL_CURRENT
377 if (*decided != raw)
379 if (!recursive (_NL_CURRENT (LC_TIME, D_T_FMT)))
381 if (*decided == loc)
382 return NULL;
384 else
386 if (*decided == not &&
387 strcmp (_NL_CURRENT (LC_TIME, D_T_FMT), HERE_D_T_FMT))
388 *decided = loc;
389 want_xday = 1;
390 break;
392 *decided = raw;
394 #endif
395 if (!recursive (HERE_D_T_FMT))
396 return NULL;
397 want_xday = 1;
398 break;
399 case 'C':
400 /* Match century number. */
401 get_number (0, 99);
402 century = val;
403 want_xday = 1;
404 break;
405 case 'd':
406 case 'e':
407 /* Match day of month. */
408 get_number (1, 31);
409 tm->tm_mday = val;
410 want_xday = 1;
411 break;
412 case 'F':
413 if (!recursive ("%Y-%m-%d"))
414 return NULL;
415 want_xday = 1;
416 break;
417 case 'x':
418 #ifdef _NL_CURRENT
419 if (*decided != raw)
421 if (!recursive (_NL_CURRENT (LC_TIME, D_FMT)))
423 if (*decided == loc)
424 return NULL;
426 else
428 if (decided == not
429 && strcmp (_NL_CURRENT (LC_TIME, D_FMT), HERE_D_FMT))
430 *decided = loc;
431 want_xday = 1;
432 break;
434 *decided = raw;
436 #endif
437 /* Fall through. */
438 case 'D':
439 /* Match standard day format. */
440 if (!recursive (HERE_D_FMT))
441 return NULL;
442 want_xday = 1;
443 break;
444 case 'k':
445 case 'H':
446 /* Match hour in 24-hour clock. */
447 get_number (0, 23);
448 tm->tm_hour = val;
449 have_I = 0;
450 break;
451 case 'I':
452 /* Match hour in 12-hour clock. */
453 get_number (1, 12);
454 tm->tm_hour = val % 12;
455 have_I = 1;
456 break;
457 case 'j':
458 /* Match day number of year. */
459 get_number (1, 366);
460 tm->tm_yday = val - 1;
461 have_yday = 1;
462 break;
463 case 'm':
464 /* Match number of month. */
465 get_number (1, 12);
466 tm->tm_mon = val - 1;
467 want_xday = 1;
468 break;
469 case 'M':
470 /* Match minute. */
471 get_number (0, 59);
472 tm->tm_min = val;
473 break;
474 case 'n':
475 case 't':
476 /* Match any white space. */
477 while (isspace (*rp))
478 ++rp;
479 break;
480 case 'p':
481 /* Match locale's equivalent of AM/PM. */
482 #ifdef _NL_CURRENT
483 if (*decided != raw)
485 if (match_string (_NL_CURRENT (LC_TIME, AM_STR), rp))
487 if (strcmp (_NL_CURRENT (LC_TIME, AM_STR), HERE_AM_STR))
488 *decided = loc;
489 break;
491 if (match_string (_NL_CURRENT (LC_TIME, PM_STR), rp))
493 if (strcmp (_NL_CURRENT (LC_TIME, PM_STR), HERE_PM_STR))
494 *decided = loc;
495 is_pm = 1;
496 break;
498 *decided = raw;
500 #endif
501 if (!match_string (HERE_AM_STR, rp))
502 if (match_string (HERE_PM_STR, rp))
503 is_pm = 1;
504 else
505 return NULL;
506 break;
507 case 'r':
508 #ifdef _NL_CURRENT
509 if (*decided != raw)
511 if (!recursive (_NL_CURRENT (LC_TIME, T_FMT_AMPM)))
513 if (*decided == loc)
514 return NULL;
516 else
518 if (*decided == not &&
519 strcmp (_NL_CURRENT (LC_TIME, T_FMT_AMPM),
520 HERE_T_FMT_AMPM))
521 *decided = loc;
522 break;
524 *decided = raw;
526 #endif
527 if (!recursive (HERE_T_FMT_AMPM))
528 return NULL;
529 break;
530 case 'R':
531 if (!recursive ("%H:%M"))
532 return NULL;
533 break;
534 case 's':
536 /* The number of seconds may be very high so we cannot use
537 the `get_number' macro. Instead read the number
538 character for character and construct the result while
539 doing this. */
540 time_t secs = 0;
541 if (*rp < '0' || *rp > '9')
542 /* We need at least one digit. */
543 return NULL;
547 secs *= 10;
548 secs += *rp++ - '0';
550 while (*rp >= '0' && *rp <= '9');
552 if (localtime_r (&secs, tm) == NULL)
553 /* Error in function. */
554 return NULL;
556 break;
557 case 'S':
558 get_number (0, 61);
559 tm->tm_sec = val;
560 break;
561 case 'X':
562 #ifdef _NL_CURRENT
563 if (*decided != raw)
565 if (!recursive (_NL_CURRENT (LC_TIME, T_FMT)))
567 if (*decided == loc)
568 return NULL;
570 else
572 if (strcmp (_NL_CURRENT (LC_TIME, T_FMT), HERE_T_FMT))
573 *decided = loc;
574 break;
576 *decided = raw;
578 #endif
579 /* Fall through. */
580 case 'T':
581 if (!recursive (HERE_T_FMT))
582 return NULL;
583 break;
584 case 'u':
585 get_number (1, 7);
586 tm->tm_wday = val % 7;
587 have_wday = 1;
588 break;
589 case 'g':
590 get_number (0, 99);
591 /* XXX This cannot determine any field in TM. */
592 break;
593 case 'G':
594 if (*rp < '0' || *rp > '9')
595 return NULL;
596 /* XXX Ignore the number since we would need some more
597 information to compute a real date. */
599 ++rp;
600 while (*rp >= '0' && *rp <= '9');
601 break;
602 case 'U':
603 case 'V':
604 case 'W':
605 get_number (0, 53);
606 /* XXX This cannot determine any field in TM without some
607 information. */
608 break;
609 case 'w':
610 /* Match number of weekday. */
611 get_number (0, 6);
612 tm->tm_wday = val;
613 have_wday = 1;
614 break;
615 case 'y':
616 /* Match year within century. */
617 get_number (0, 99);
618 /* The "Year 2000: The Millennium Rollover" paper suggests that
619 values in the range 69-99 refer to the twentieth century. */
620 tm->tm_year = val >= 69 ? val : val + 100;
621 /* Indicate that we want to use the century, if specified. */
622 want_century = 1;
623 want_xday = 1;
624 break;
625 case 'Y':
626 /* Match year including century number. */
627 get_number (0, 9999);
628 tm->tm_year = val - 1900;
629 want_century = 0;
630 want_xday = 1;
631 break;
632 case 'Z':
633 /* XXX How to handle this? */
634 break;
635 case 'E':
636 #ifdef _NL_CURRENT
637 switch (*fmt++)
639 case 'c':
640 /* Match locale's alternate date and time format. */
641 if (*decided != raw)
643 const char *fmt = _NL_CURRENT (LC_TIME, ERA_D_T_FMT);
645 if (*fmt == '\0')
646 fmt = _NL_CURRENT (LC_TIME, D_T_FMT);
648 if (!recursive (fmt))
650 if (*decided == loc)
651 return NULL;
653 else
655 if (strcmp (fmt, HERE_D_T_FMT))
656 *decided = loc;
657 want_xday = 1;
658 break;
660 *decided = raw;
662 /* The C locale has no era information, so use the
663 normal representation. */
664 if (!recursive (HERE_D_T_FMT))
665 return NULL;
666 want_xday = 1;
667 break;
668 case 'C':
669 case 'y':
670 case 'Y':
671 /* Match name of base year in locale's alternate
672 representation. */
673 /* XXX This is currently not implemented. It should
674 use the value _NL_CURRENT (LC_TIME, ERA). */
675 break;
676 case 'x':
677 if (*decided != raw)
679 const char *fmt = _NL_CURRENT (LC_TIME, ERA_D_FMT);
681 if (*fmt == '\0')
682 fmt = _NL_CURRENT (LC_TIME, D_FMT);
684 if (!recursive (fmt))
686 if (*decided == loc)
687 return NULL;
689 else
691 if (strcmp (fmt, HERE_D_FMT))
692 *decided = loc;
693 break;
695 *decided = raw;
697 if (!recursive (HERE_D_FMT))
698 return NULL;
699 break;
700 case 'X':
701 if (*decided != raw)
703 const char *fmt = _NL_CURRENT (LC_TIME, ERA_T_FMT);
705 if (*fmt == '\0')
706 fmt = _NL_CURRENT (LC_TIME, T_FMT);
708 if (!recursive (fmt))
710 if (*decided == loc)
711 return NULL;
713 else
715 if (strcmp (fmt, HERE_T_FMT))
716 *decided = loc;
717 break;
719 *decided = raw;
721 if (!recursive (HERE_T_FMT))
722 return NULL;
723 break;
724 default:
725 return NULL;
727 break;
728 #else
729 /* We have no information about the era format. Just use
730 the normal format. */
731 if (*fmt != 'c' && *fmt != 'C' && *fmt != 'y' && *fmt != 'Y'
732 && *fmt != 'x' && *fmt != 'X')
733 /* This is an illegal format. */
734 return NULL;
736 goto start_over;
737 #endif
738 case 'O':
739 switch (*fmt++)
741 case 'd':
742 case 'e':
743 /* Match day of month using alternate numeric symbols. */
744 get_alt_number (1, 31);
745 tm->tm_mday = val;
746 want_xday = 1;
747 break;
748 case 'H':
749 /* Match hour in 24-hour clock using alternate numeric
750 symbols. */
751 get_alt_number (0, 23);
752 tm->tm_hour = val;
753 have_I = 0;
754 break;
755 case 'I':
756 /* Match hour in 12-hour clock using alternate numeric
757 symbols. */
758 get_alt_number (1, 12);
759 tm->tm_hour = val - 1;
760 have_I = 1;
761 break;
762 case 'm':
763 /* Match month using alternate numeric symbols. */
764 get_alt_number (1, 12);
765 tm->tm_mon = val - 1;
766 want_xday = 1;
767 break;
768 case 'M':
769 /* Match minutes using alternate numeric symbols. */
770 get_alt_number (0, 59);
771 tm->tm_min = val;
772 break;
773 case 'S':
774 /* Match seconds using alternate numeric symbols. */
775 get_alt_number (0, 61);
776 tm->tm_sec = val;
777 break;
778 case 'U':
779 case 'V':
780 case 'W':
781 get_alt_number (0, 53);
782 /* XXX This cannot determine any field in TM without
783 further information. */
784 break;
785 case 'w':
786 /* Match number of weekday using alternate numeric symbols. */
787 get_alt_number (0, 6);
788 tm->tm_wday = val;
789 have_wday = 1;
790 break;
791 case 'y':
792 /* Match year within century using alternate numeric symbols. */
793 get_alt_number (0, 99);
794 tm->tm_year = val >= 69 ? val : val + 100;
795 want_xday = 1;
796 break;
797 default:
798 return NULL;
800 break;
801 default:
802 return NULL;
806 if (have_I && is_pm)
807 tm->tm_hour += 12;
809 if (want_century && century != -1)
810 tm->tm_year = tm->tm_year % 100 + (century - 19) * 100;
812 if (want_xday && !have_wday)
813 day_of_the_week (tm);
814 if (want_xday && !have_yday)
815 day_of_the_year (tm);
817 return (char *) rp;
821 char *
822 strptime (buf, format, tm)
823 const char *buf;
824 const char *format;
825 struct tm *tm;
827 enum locale_status decided;
828 #ifdef _NL_CURRENT
829 decided = not;
830 #else
831 decided = raw;
832 #endif
833 return strptime_internal (buf, format, tm, &decided);