Update Spanish translation
[gnumeric.git] / src / number-match.c
blobf0cf15b199ef12c30242bbb386fa6a1813b2a829
1 /*
2 * number-match.c: This file includes the support for matching
3 * entered strings as numbers (by trying to apply one of the existing
4 * cell formats).
6 * The idea is simple: we create a regular expression from the format
7 * string that would match a value entered in that format. Then, on
8 * lookup we try to match the string against every regular expression
9 * we have: if a match is found, then we decode the number using a
10 * precomputed parallel-list of subexpressions.
12 * Authors:
13 * Morten Welinder (terra@gnome.org)
14 * Miguel de Icaza (miguel@gnu.org)
16 #include <gnumeric-config.h>
17 #include <glib/gi18n-lib.h>
18 #include <gnumeric.h>
19 #include <number-match.h>
21 #include <gutils.h>
22 #include <style.h>
23 #include <gnm-format.h>
24 #include <value.h>
25 #include <mathfunc.h>
26 #include <numbers.h>
27 #include <gnm-datetime.h>
28 #include <goffice/goffice.h>
30 #include <string.h>
31 #include <stdlib.h>
32 #include <errno.h>
33 #include <locale.h>
34 #include <math.h>
35 #include <time.h>
36 #undef DEBUG_NUMBER_MATCH
39 * value_is_error: Check to see if a string begins with one of the magic
40 * error strings.
42 * @str: The string to test
44 * Returns: an error if there is one, or %NULL.
46 static GnmValue *
47 value_is_error (char const *str)
49 GnmStdError e;
51 if (str[0] != '#')
52 return NULL;
54 for (e = (GnmStdError)0; e < GNM_ERROR_UNKNOWN; e++)
55 if (0 == strcmp (str, value_error_name (e, TRUE)))
56 return value_new_error_std (NULL, e);
58 return NULL;
61 /**
62 * format_match_simple:
63 * @text: A String to match against.
65 * Attempt to match the supplied string as a simple value.
67 * WARNING WARNING WARNING : This routine should NEVER be changed to match
68 * VALUE_STRING that will break the parsers
69 * handling of named expressions.
71 GnmValue *
72 format_match_simple (char const *text)
74 /* Is it a boolean? */
75 if (0 == g_ascii_strcasecmp (text, go_locale_boolean_name (TRUE)))
76 return value_new_bool (TRUE);
77 if (0 == g_ascii_strcasecmp (text, go_locale_boolean_name (FALSE)))
78 return value_new_bool (FALSE);
80 /* Is it an error? */
82 GnmValue *err = value_is_error (text);
83 if (err != NULL)
84 return err;
87 /* Is it a floating-point number */
89 char *end;
90 gnm_float d;
92 d = gnm_utf8_strto (text, &end);
93 if (text != end && errno != ERANGE && gnm_finite (d)) {
94 /* Allow and ignore spaces at the end. */
95 while (g_ascii_isspace (*end))
96 end++;
97 if (*end == '\0')
98 return value_new_float (d);
102 return NULL;
105 static struct {
106 char *lc_time;
107 GORegexp re_MMMMddyyyy;
108 GORegexp re_ddMMMMyyyy;
109 GORegexp re_yyyymmdd1;
110 GORegexp re_yyyymmdd2;
111 GORegexp re_mmddyyyy;
112 GORegexp re_mmdd;
113 GORegexp re_hhmmss1;
114 GORegexp re_hhmmss2;
115 GORegexp re_hhmmssds;
116 GORegexp re_hhmmss_ampm;
117 } datetime_locale;
120 static void
121 datetime_locale_clear (void)
123 g_free (datetime_locale.lc_time);
124 go_regfree (&datetime_locale.re_MMMMddyyyy);
125 go_regfree (&datetime_locale.re_ddMMMMyyyy);
126 go_regfree (&datetime_locale.re_yyyymmdd1);
127 go_regfree (&datetime_locale.re_yyyymmdd2);
128 go_regfree (&datetime_locale.re_mmddyyyy);
129 go_regfree (&datetime_locale.re_mmdd);
130 go_regfree (&datetime_locale.re_hhmmss1);
131 go_regfree (&datetime_locale.re_hhmmss2);
132 go_regfree (&datetime_locale.re_hhmmssds);
133 go_regfree (&datetime_locale.re_hhmmss_ampm);
134 memset (&datetime_locale, 0, sizeof (datetime_locale));
137 static char const *
138 my_regerror (int err, GORegexp const *preg)
140 static char buffer[1024];
141 go_regerror (err, preg, buffer, sizeof (buffer));
142 return buffer;
145 static void
146 datetime_locale_setup1 (GORegexp *rx, char const *pat)
148 int ret = go_regcomp (rx, pat, GO_REG_ICASE);
149 if (ret) {
150 g_warning ("Failed to compile rx \"%s\": %s\n",
151 pat,
152 my_regerror (ret, rx));
157 static void
158 datetime_locale_setup (char const *lc_time)
160 GString *p_MMMM = g_string_sized_new (200);
161 GString *p_MMM = g_string_sized_new (200);
162 GString *p_decimal = g_string_sized_new (10);
163 char *s;
164 int m;
166 datetime_locale.lc_time = g_strdup (lc_time);
168 for (m = 1; m <= 12; m++) {
169 if (m != 1)
170 g_string_append_c (p_MMMM, '|');
171 g_string_append_c (p_MMMM, '(');
172 s = go_date_month_name (m, FALSE);
173 go_regexp_quote (p_MMMM, s);
174 g_free (s);
175 g_string_append_c (p_MMMM, ')');
177 if (m != 1)
178 g_string_append_c (p_MMM, '|');
179 g_string_append_c (p_MMM, '(');
180 s = go_date_month_name (m, TRUE);
181 go_regexp_quote (p_MMM, s);
182 /* nb_NO actually adds a "." for these abbreviations. */
183 if (g_unichar_ispunct (g_utf8_get_char (g_utf8_prev_char (p_MMM->str + p_MMM->len))))
184 g_string_append_c (p_MMM, '?');
185 g_free (s);
186 g_string_append_c (p_MMM, ')');
189 go_regexp_quote (p_decimal, go_locale_get_decimal ()->str);
192 * "Dec 1, 2000"
193 * "Dec/1/04"
194 * "December 1, 2000"
195 * "December 1 2000"
196 * "Dec-1-2000"
197 * "Dec 1"
198 * "Dec/1"
199 * "December 1"
200 * "Dec-1"
201 * "Jan 2010"
202 * "January 2010"
204 s = g_strconcat ("^(",
205 p_MMMM->str,
206 "|",
207 p_MMM->str,
208 ")(-|/|\\s)(\\d+)((,?\\s+|-|/)(\\d+))?\\b",
209 NULL);
210 datetime_locale_setup1 (&datetime_locale.re_MMMMddyyyy, s);
211 g_free (s);
214 * "1-Dec-2000"
215 * "1/Dec/04"
216 * "1-December-2000"
217 * "1. december 2000"
218 * "1. december, 2000"
219 * "1-Dec"
220 * "1/Dec"
221 * "1-December"
222 * "1. december"
224 s = g_strconcat ("^(\\d+)(-|/|\\.?\\s*)(",
225 p_MMMM->str,
226 "|",
227 p_MMM->str,
228 ")((,?\\s*|-|/)(\\d+))?\\b",
229 NULL);
230 datetime_locale_setup1 (&datetime_locale.re_ddMMMMyyyy, s);
231 g_free (s);
234 * "20001231"
235 * (with special support for 20001231:123456)
237 s = g_strconcat ("^(\\d\\d\\d\\d)(\\d\\d)(\\d\\d)(:\\d\\d\\d\\d\\d\\d(",
238 p_decimal->str,
239 "\\d*)?)?\\s*$",
240 NULL);
241 datetime_locale_setup1 (&datetime_locale.re_yyyymmdd1, s);
242 g_free (s);
245 * "1900/01/01"
246 * "1900-1-1"
248 datetime_locale_setup1 (&datetime_locale.re_yyyymmdd2,
249 "^(\\d\\d\\d\\d)[-/.](\\d+)[-/.](\\d+)\\b");
252 * "01/31/2001" [Jan 31] if month_before_day
253 * "1/2/88" [Jan 2] if month_before_day
254 * "1/2/88" [Feb 1] if !month_before_day
255 * "31/1/2001" [Jan 31] if !month_before_day
257 datetime_locale_setup1 (&datetime_locale.re_mmddyyyy,
258 "^(\\d+)[-/.](\\d+)[-/.](\\d+)\\b");
261 * "2005/2" [Feb 1]
262 * "2/2005" [Feb 1]
263 * "01/31" [Jan 31] if month_before_day
264 * "31/1" [Jan 31] if !month_before_day
266 datetime_locale_setup1 (&datetime_locale.re_mmdd,
267 "^(\\d+)([-/.])(\\d+)\\b");
270 * "15:30:00.3"
271 * "30:00.3" [A little more than 30min]
272 * "115:30:00.3"
274 /* ^(((\d+):)?(\d+):)?(\d+.\d*)\s*$ */
275 s = g_strconcat ("^(((\\d+):)?(\\d+):)?(\\d+",
276 p_decimal->str,
277 ".\\d*)\\s*$",
278 NULL);
279 datetime_locale_setup1 (&datetime_locale.re_hhmmssds, s);
280 g_free (s);
283 * "15:30:00"
284 * "15:30" [15:30:00] if prefer_hour
285 * "15:30" [00:15:30] if !prefer_hour
287 datetime_locale_setup1 (&datetime_locale.re_hhmmss1,
288 "^(\\d+):(\\d+)(:(\\d+))?\\s*$");
291 * "153000"
292 * "153000.2"
294 s = g_strconcat ("^(\\d\\d)(\\d\\d)(\\d\\d)?(",
295 p_decimal->str,
296 "\\d*)?\\s*$",
297 NULL);
298 datetime_locale_setup1 (&datetime_locale.re_hhmmss2, s);
299 g_free (s);
302 * "12:30:01.3 am"
303 * "12:30:01 am"
304 * "12:30 am"
305 * "12am"
307 s = g_strconcat ("^(\\d+)(:(\\d+)(:(\\d+(",
308 p_decimal->str,
309 "\\d*)?))?)?\\s*((am)|(pm))\\s*$",
310 NULL);
311 datetime_locale_setup1 (&datetime_locale.re_hhmmss_ampm, s);
312 g_free (s);
314 g_string_free (p_MMMM, TRUE);
315 g_string_free (p_MMM, TRUE);
316 g_string_free (p_decimal, TRUE);
319 static int
320 find_month (GORegmatch const *pm)
322 int m;
324 for (m = 1; m <= 12; m++) {
325 if (pm->rm_so != pm->rm_eo)
326 return m;
327 pm++;
330 return -1;
333 static int
334 handle_int (char const *text, GORegmatch const *pm, int min, int max, int maxlen)
336 int i = 0;
337 char const *p = text + pm->rm_so;
338 char const *end = text + pm->rm_eo;
339 int len = 0;
341 while (p != end) {
342 gunichar uc = g_utf8_get_char (p);
343 p = g_utf8_next_char (p);
344 i = (10 * i) + g_unichar_digit_value (uc);
345 len++;
347 if (i > max || len > maxlen)
348 return -1;
351 if (i >= min)
352 return i;
353 else
354 return -1;
357 static int
358 handle_day (char const *text, GORegmatch const *pm)
360 return handle_int (text, pm, 1, 31, 2);
363 static int
364 handle_month (char const *text, GORegmatch const *pm)
366 return handle_int (text, pm, 1, 12, 2);
369 static int
370 current_year (void)
372 time_t now = time (NULL);
373 struct tm *tm = localtime (&now);
374 return 1900 + tm->tm_year;
377 static int
378 handle_year (char const *text, GORegmatch const *pm)
380 int y;
382 if (pm->rm_so == pm->rm_eo)
383 return current_year ();
385 y = handle_int (text, pm, 0, 9999, 4);
387 if (y < 0)
388 return -1;
389 else if (y <= 29)
390 return 2000 + y;
391 else if (y <= 99)
392 return 1900 + y;
393 else if (y < (gnm_datetime_allow_negative () ? 1582 : 1900))
394 return -1;
395 else
396 return y;
400 static gnm_float
401 handle_float (char const *text, GORegmatch const *pm)
403 gnm_float val = 0;
404 char const *p;
405 char const *end;
406 gnm_float num = 10;
408 /* Empty means zero. */
409 if (pm->rm_so == pm->rm_eo)
410 return 0;
412 p = text + pm->rm_so;
413 end = text + pm->rm_eo;
414 while (p != end) {
415 gunichar uc = g_utf8_get_char (p);
416 int d = g_unichar_digit_value (uc);
417 p = g_utf8_next_char (p);
418 if (d < 0) break; /* Must be decimal sep. */
419 val = (10 * val) + d;
422 while (p != end) {
423 gunichar uc = g_utf8_get_char (p);
424 int d = g_unichar_digit_value (uc);
425 p = g_utf8_next_char (p);
426 val += d / num;
427 num *= 10;
430 return val;
433 static void
434 fixup_hour_ampm (gnm_float *hour, const GORegmatch *pm)
436 gboolean is_am = (pm->rm_so != pm->rm_eo);
438 if (*hour < 1 || *hour > 12) {
439 *hour = -1;
440 return;
443 if (*hour == 12)
444 *hour = 0;
445 if (!is_am)
446 *hour += 12;
449 static gboolean
450 valid_hms (gnm_float h, gnm_float m, gnm_float s,
451 gboolean allow_elapsed, char *elapsed)
453 gboolean h_ok = h >= 0 && h < 24;
454 gboolean m_ok = m >= 0 && m < 60;
455 gboolean s_ok = s >= 0 && s < 60;
457 /* Boring old clock time. */
458 if (h_ok && m_ok && s_ok) {
459 if (elapsed)
460 *elapsed = 0;
461 return TRUE;
464 if (!allow_elapsed)
465 return FALSE;
467 if (*elapsed == 'h' && m_ok && s_ok)
468 return TRUE;
470 if (*elapsed == 'm' && h == 0 && s_ok)
471 return TRUE;
473 if (*elapsed == 's' && h == 0 && m == 0)
474 return TRUE;
476 return FALSE;
479 #define DO_SIGN(sign,uc,action) \
481 if (uc == '-' || uc == UNICODE_MINUS_SIGN_C) { \
482 sign = '-'; \
483 action; \
484 } else if (uc == '+') { \
485 sign = '+'; \
486 action; \
490 #define SKIP_DIGITS(text) while (g_ascii_isdigit (*(text))) (text)++
492 #define SKIP_SPACES(text) \
493 while (*(text) && g_unichar_isspace (g_utf8_get_char (text))) \
494 (text) = g_utf8_next_char (text)
497 static GnmValue *
498 format_match_time (char const *text, gboolean allow_elapsed,
499 gboolean prefer_hour, gboolean add_format)
501 char sign = 0;
502 gunichar uc;
503 gnm_float hour, minute, second;
504 gnm_float time_val;
505 GORegmatch match[10];
506 char const *time_format = NULL;
507 GnmValue *v;
509 SKIP_SPACES (text);
511 /* AM/PM means hour is needed. No sign allowed. */
512 /* ^(\d+)(:(\d+)(:(\d+(.\d*)?))?)?\s*((am)|(pm))\s*$ */
513 /* 1 2 3 4 5 6 78 9 */
514 if (go_regexec (&datetime_locale.re_hhmmss_ampm, text, G_N_ELEMENTS (match), match, 0) == 0) {
515 hour = handle_float (text, match + 1);
516 fixup_hour_ampm (&hour, match + 8);
517 minute = handle_float (text, match + 3);
518 second = handle_float (text, match + 5);
519 if (valid_hms (hour, minute, second, FALSE, NULL)) {
520 time_format = "h:mm:ss AM/PM";
521 goto got_time;
525 uc = g_utf8_get_char (text);
526 if (allow_elapsed) {
527 DO_SIGN (sign, uc, {
528 text = g_utf8_next_char (text);
532 /* If fractional seconds are present, we know the layout. */
533 /* ^(((\d+):)?(\d+):)?(\d+.\d*)\s*$ */
534 /* 123 4 5 */
535 if (go_regexec (&datetime_locale.re_hhmmssds, text, G_N_ELEMENTS (match), match, 0) == 0) {
536 char elapsed =
537 match[3].rm_so != match[3].rm_eo
538 ? 'h'
539 : (match[4].rm_so != match[4].rm_eo
540 ? 'm'
541 : 's');
543 hour = handle_float (text, match + 3);
544 minute = handle_float (text, match + 4);
545 second = handle_float (text, match + 5);
547 if (valid_hms (hour, minute, second, allow_elapsed, &elapsed)) {
548 time_format = elapsed ? "[h]:mm:ss" : "h:mm:ss";
549 goto got_time;
553 /* ^(\d+):(\d+)(:(\d+))?\s*$ */
554 /* 1 2 3 4 */
555 if (go_regexec (&datetime_locale.re_hhmmss1, text, G_N_ELEMENTS (match), match, 0) == 0) {
556 gboolean has_all = (match[4].rm_so != match[4].rm_eo);
557 char elapsed;
558 const char *time_format_elapsed;
560 if (prefer_hour || has_all) {
561 hour = handle_float (text, match + 1);
562 minute = handle_float (text, match + 2);
563 second = handle_float (text, match + 4);
564 time_format = has_all ? "h:mm:ss" : "h:mm";
565 time_format_elapsed = has_all ? "[h]:mm:ss" : "[h]:mm";
566 elapsed = 'h';
567 } else {
568 hour = 0;
569 minute = handle_float (text, match + 1);
570 second = handle_float (text, match + 2);
571 time_format = "mm:ss";
572 time_format_elapsed = "[m]:ss";
573 elapsed = 'm';
576 if (valid_hms (hour, minute, second, allow_elapsed, &elapsed)) {
577 if (elapsed)
578 time_format = time_format_elapsed;
579 goto got_time;
583 /* ^(\d\d)(\d\d)(\d\d)?(\.\d*)?\s*$ */
584 /* 1 2 3 4 */
585 if (go_regexec (&datetime_locale.re_hhmmss2, text, G_N_ELEMENTS (match), match, 0) == 0) {
586 gboolean has3 = (match[3].rm_so != match[3].rm_eo);
587 gboolean hasfrac = (match[4].rm_so != match[4].rm_eo);
588 char elapsed;
589 const char *time_format_elapsed;
591 if ((prefer_hour && !hasfrac) || has3) {
592 hour = handle_float (text, match + 1);
593 minute = handle_float (text, match + 2);
594 second = handle_float (text, match + 3) + handle_float (text, match + 4);
595 time_format = "h:mm:ss";
596 time_format_elapsed = "[h]:mm:ss";
597 elapsed = 'h';
598 } else {
599 hour = 0;
600 minute = handle_float (text, match + 1);
601 second = handle_float (text, match + 2) + handle_float (text, match + 4);
602 time_format = "mm:ss";
603 time_format_elapsed = "[m]:ss";
604 elapsed = 'm';
607 if (valid_hms (hour, minute, second, allow_elapsed, &elapsed)) {
608 if (elapsed)
609 time_format = time_format_elapsed;
610 goto got_time;
614 return NULL;
616 got_time:
617 time_val = (second + 60 * (minute + 60 * hour)) / (24 * 60 * 60);
618 if (sign == '-')
619 time_val = 0 - time_val;
620 v = value_new_float (time_val);
622 if (add_format) {
623 GOFormat *fmt = go_format_new_from_XL (time_format);
624 value_set_fmt (v, fmt);
625 go_format_unref (fmt);
628 return v;
631 static gboolean
632 valid_dmy (int d, int m, int y)
634 /* Avoid sign-induced problem. d and m are capped. */
635 return y >= 0 && g_date_valid_dmy (d, m, y);
639 GnmValue *
640 format_match_datetime (char const *text,
641 GODateConventions const *date_conv,
642 gboolean month_before_day,
643 gboolean add_format,
644 gboolean presume_date)
646 int day, month, year;
647 GDate date;
648 gnm_float time_val, date_val;
649 char const *lc_time = setlocale (LC_TIME, NULL);
650 GORegmatch match[31];
651 gunichar uc;
652 int dig1;
653 char *date_format = NULL;
654 GnmValue *res = NULL;
655 char *time_format = NULL;
657 if (lc_time != datetime_locale.lc_time &&
658 (lc_time == NULL ||
659 datetime_locale.lc_time == NULL ||
660 strcmp (lc_time, datetime_locale.lc_time))) {
661 datetime_locale_clear ();
662 datetime_locale_setup (lc_time);
665 SKIP_SPACES (text);
666 uc = g_utf8_get_char (text);
667 dig1 = g_unichar_digit_value (uc);
669 /* ^(MMMM)(-|/|\s)(\d+)((,\s+|-|/)(\d+))?\b */
670 /* 1 26 27 28 30 */
671 /* 29 */
672 if (dig1 < 0 &&
673 go_regexec (&datetime_locale.re_MMMMddyyyy, text, G_N_ELEMENTS (match), match, 0) == 0) {
674 month = find_month (&match[2]);
675 if (month == -1) month = find_month (&match[2 + 12]);
676 day = handle_day (text, match + 27);
677 if (day == -1 &&
678 match[27].rm_eo - match[27].rm_so >= 4 &&
679 match[28].rm_so == match[28].rm_eo) {
680 /* Only one number with 4+ digits -- might be a year. */
681 year = handle_year (text, match + 27);
682 day = 1;
683 } else {
684 year = handle_year (text, match + 30);
686 if (valid_dmy (day, month, year)) {
687 date_format = gnm_format_frob_slashes ("mmm/dd/yyyy");
688 text += match[0].rm_eo;
689 goto got_date;
693 /* ^(\d+)(-|/|\.?\s*)(MMMM)((,?\s*|-|/)(\d+))?\b */
694 /* 1 2 3 28 30 */
695 /* 29 */
696 if (dig1 >= 0 &&
697 go_regexec (&datetime_locale.re_ddMMMMyyyy, text, G_N_ELEMENTS (match), match, 0) == 0) {
698 day = handle_day (text, match + 1);
699 month = find_month (&match[4]);
700 if (month == -1) month = find_month (&match[4 + 12]);
701 year = handle_year (text, match + 30);
702 if (valid_dmy (day, month, year)) {
703 date_format = g_strdup ("d-mmm-yyyy");
704 text += match[0].rm_eo;
705 goto got_date;
709 /* ^(\d\d\d\d)(\d\d)(\d\d)(:\d\d\d\d\d\d(\.\d*)?)?\s*$ */
710 /* 1 2 3 4 5 */
711 if (dig1 > 0 && /* Exclude zero. */
712 go_regexec (&datetime_locale.re_yyyymmdd1, text, G_N_ELEMENTS (match), match, 0) == 0) {
713 year = handle_year (text, match + 1);
714 month = handle_month (text, match + 2);
715 day = handle_day (text, match + 3);
716 if (valid_dmy (day, month, year)) {
717 date_format = g_strdup ("yyyy-mmm-dd");
718 text += match[3].rm_eo;
719 if (*text == ':')
720 text++;
721 goto got_date;
725 /* ^(\d\d\d\d)[-/.](\d\d)[-/.](\d\d)\b */
726 /* 1 2 3 */
727 if (dig1 > 0 && /* Exclude zero. */
728 go_regexec (&datetime_locale.re_yyyymmdd2, text, G_N_ELEMENTS (match), match, 0) == 0) {
729 year = handle_year (text, match + 1);
730 month = handle_month (text, match + 2);
731 day = handle_day (text, match + 3);
732 if (valid_dmy (day, month, year)) {
733 date_format = g_strdup ("yyyy-mmm-dd");
734 text += match[0].rm_eo;
735 goto got_date;
739 /* ^(\d+)[-/.](\d+)[-/.](\d+)\b */
740 /* 1 2 3 */
741 if (dig1 >= 0 &&
742 go_regexec (&datetime_locale.re_mmddyyyy, text, G_N_ELEMENTS (match), match, 0) == 0) {
743 if (month_before_day) {
744 month = handle_month (text, match + 1);
745 day = handle_day (text, match + 2);
746 } else {
747 month = handle_month (text, match + 2);
748 day = handle_day (text, match + 1);
750 year = handle_year (text, match + 3);
751 if (valid_dmy (day, month, year)) {
752 date_format = gnm_format_frob_slashes (month_before_day
753 ? "m/d/yyyy"
754 : "d/m/yyyy");
755 text += match[0].rm_eo;
756 goto got_date;
760 /* ^(\d+)([-/.])(\d+)\b */
761 /* 1 2 3 */
762 if (dig1 >= 0 &&
763 go_regexec (&datetime_locale.re_mmdd, text, G_N_ELEMENTS (match), match, 0) == 0) {
765 * Unless we already have a date format, do not accept
766 * 1-10, for example. See bug 376090.
768 gboolean good_ddmmsep =
769 presume_date ||
770 text[match[2].rm_so] == '/';
771 if (match[1].rm_eo - match[1].rm_so == 4) {
772 year = handle_year (text, match + 1);
773 month = handle_month (text, match + 3);
774 day = 1;
775 date_format = g_strdup ("yyyy/m");
776 } else if (match[3].rm_eo - match[3].rm_so == 4) {
777 month = handle_month (text, match + 1);
778 year = handle_year (text, match + 3);
779 day = 1;
780 date_format = g_strdup ("m/yyyy");
781 } else if (good_ddmmsep && month_before_day) {
782 month = handle_month (text, match + 1);
783 day = handle_day (text, match + 3);
784 year = current_year ();
785 date_format = gnm_format_frob_slashes ("m/d/yyyy");
786 } else if (good_ddmmsep) {
787 month = handle_month (text, match + 3);
788 day = handle_day (text, match + 1);
789 year = current_year ();
790 date_format = gnm_format_frob_slashes ("d/m/yyyy");
791 } else
792 year = month = day = -1;
793 if (valid_dmy (day, month, year)) {
794 text += match[0].rm_eo;
795 goto got_date;
799 g_free (date_format);
800 return NULL;
802 got_date:
803 g_date_clear (&date, 1);
804 g_date_set_dmy (&date, day, month, year);
805 if (!g_date_valid (&date))
806 goto out;
807 date_val = go_date_g_to_serial (&date, date_conv);
809 SKIP_SPACES (text);
811 if (*text) {
812 GnmValue *v = format_match_time (text, FALSE,
813 TRUE, add_format);
814 GOFormat const *fmt;
815 if (!v)
816 goto out;
817 time_val = value_get_as_float (v);
818 fmt = VALUE_FMT (v);
819 if (fmt)
820 time_format = g_strdup (go_format_as_XL (fmt));
821 value_release (v);
822 } else
823 time_val = 0;
825 res = value_new_float (date_val + time_val);
826 if (add_format) {
827 GOFormat *fmt;
828 if (time_format) {
829 char *format = g_strconcat (date_format,
830 " ",
831 time_format,
832 NULL);
833 fmt = go_format_new_from_XL (format);
834 g_free (format);
835 } else
836 fmt = go_format_new_from_XL (date_format);
837 value_set_fmt (res, fmt);
838 go_format_unref (fmt);
841 out:
842 g_free (date_format);
843 g_free (time_format);
844 return res;
848 * Match "12/23", "-12/23", "1 2/3", "-1 2/3", and even "-123".
849 * Does not match "1/0".
851 * Spaces are allowed anywhere but between digits and between
852 * sign and digits.
854 * The number of digits in the denominator is stored in @denlen.
856 static GnmValue *
857 format_match_fraction (char const *text, int *denlen, gboolean mixed_only)
859 char sign = 0;
860 gnm_float whole, num, den, f;
861 char const *start;
862 gunichar uc;
864 SKIP_SPACES (text);
866 uc = g_utf8_get_char (text);
867 DO_SIGN (sign, uc, { text = g_utf8_next_char (text); });
869 if (*text == 0 || !g_ascii_isdigit (*text))
870 return NULL;
872 start = text;
873 SKIP_DIGITS (text);
874 SKIP_SPACES (text);
876 if (*text == '/') {
877 if (mixed_only)
878 return NULL;
879 whole = 0;
880 } else {
881 whole = gnm_utf8_strto (start, NULL);
882 if (errno == ERANGE)
883 return NULL;
884 if (*text == 0) {
885 num = 0;
886 den = 1;
887 *denlen = 0;
888 goto done;
889 } else if (!g_ascii_isdigit (*text))
890 return NULL;
892 start = text;
893 SKIP_DIGITS (text);
894 SKIP_SPACES (text);
896 if (*text != '/')
897 return NULL;
900 num = gnm_utf8_strto (start, NULL);
901 if (errno == ERANGE)
902 return NULL;
904 text++;
905 SKIP_SPACES (text);
906 start = text;
907 SKIP_DIGITS (text);
908 *denlen = text - start;
909 SKIP_SPACES (text);
911 if (*text != 0)
912 return NULL;
914 den = gnm_utf8_strto (start, NULL);
915 if (errno == ERANGE)
916 return NULL;
917 if (den == 0)
918 return NULL;
920 done:
921 f = whole + num / den;
922 if (sign == '-')
923 f = -f;
925 return value_new_float (f);
929 GnmValue *
930 format_match_decimal_number_with_locale (char const *text, GOFormatFamily *family,
931 GString const *curr, GString const *thousand,
932 GString const *decimal)
934 gboolean par_open = FALSE;
935 gboolean par_close = FALSE;
936 gboolean has_curr = FALSE;
937 gboolean has_percent = FALSE;
938 char sign = 0;
939 GString *numstr = g_string_sized_new (20);
940 gboolean last_was_digit = FALSE;
941 gboolean allow1000 = (thousand != NULL) && (thousand->len != 0);
943 g_return_val_if_fail (curr != NULL, NULL);
944 g_return_val_if_fail (decimal != NULL, NULL);
946 while (*text) {
947 gunichar uc = g_utf8_get_char (text);
949 if (!has_curr && strncmp (curr->str, text, curr->len) == 0) {
950 has_curr = TRUE;
951 text += curr->len;
952 continue;
955 if (g_unichar_isspace (uc)) {
956 text = g_utf8_next_char (text);
957 continue;
960 if (!sign) {
961 DO_SIGN (sign, uc, {
962 g_string_append_c (numstr, sign);
963 text = g_utf8_next_char (text);
964 continue;
968 if (!par_open && !sign && uc == '(') {
969 sign = '-';
970 g_string_append_c (numstr, sign);
971 par_open = TRUE;
972 text++;
973 continue;
976 break;
979 while (*text) {
980 char c = *text;
982 if (last_was_digit &&
983 allow1000 &&
984 strncmp (thousand->str, text, thousand->len) == 0 &&
985 g_ascii_isdigit (text[thousand->len]) &&
986 g_ascii_isdigit (text[thousand->len + 1]) &&
987 g_ascii_isdigit (text[thousand->len + 2])) {
988 text += thousand->len;
989 continue;
992 if (strncmp (decimal->str, text, decimal->len) == 0) {
993 GString const *local_decimal = go_locale_get_decimal ();
994 g_string_append_len (numstr, local_decimal->str, local_decimal->len);
995 text += decimal->len;
996 allow1000 = FALSE;
997 continue;
1000 if (g_ascii_isdigit (c)) {
1001 g_string_append_c (numstr, c);
1002 text++;
1003 last_was_digit = TRUE;
1004 continue;
1006 last_was_digit = FALSE;
1008 if (c == 'e' || c == 'E') {
1009 char esign = 0;
1010 gunichar uc;
1013 * Pretend to have seen a sign so we don't accept
1014 * a "-" at the end.
1016 if (!sign)
1017 sign = '+';
1018 allow1000 = FALSE;
1020 g_string_append_c (numstr, c);
1021 text++;
1023 uc = g_utf8_get_char (text);
1024 DO_SIGN (esign, uc, {
1025 text = g_utf8_next_char (text);
1026 g_string_append_c (numstr, esign);
1029 continue;
1032 break;
1035 while (*text) {
1036 gunichar uc = g_utf8_get_char (text);
1038 if (!has_curr && strncmp (curr->str, text, curr->len) == 0) {
1039 has_curr = TRUE;
1040 text += curr->len;
1041 continue;
1044 if (g_unichar_isspace (uc)) {
1045 text = g_utf8_next_char (text);
1046 continue;
1049 if (!sign) {
1050 DO_SIGN (sign, uc, {
1051 g_string_prepend_c (numstr, sign);
1052 text = g_utf8_next_char (text);
1053 continue;
1057 if (!par_close && par_open && uc == ')') {
1058 par_close = TRUE;
1059 text++;
1060 continue;
1063 if (!has_percent && uc == '%') {
1064 has_percent = TRUE;
1065 text++;
1066 continue;
1069 break;
1072 if (*text ||
1073 numstr->len == 0 ||
1074 par_open != par_close ||
1075 (has_percent && (par_open || has_curr))) {
1076 g_string_free (numstr, TRUE);
1077 return NULL;
1078 } else {
1079 gnm_float f;
1080 char *end;
1081 gboolean bad;
1083 f = gnm_utf8_strto (numstr->str, &end);
1084 bad = *end || errno == ERANGE;
1085 g_string_free (numstr, TRUE);
1087 if (bad)
1088 return NULL;
1090 if (par_open)
1091 *family = GO_FORMAT_ACCOUNTING;
1092 else if (has_curr)
1093 *family = GO_FORMAT_CURRENCY;
1094 else if (has_percent)
1095 *family = GO_FORMAT_PERCENTAGE;
1096 else
1097 *family = GO_FORMAT_GENERAL;
1099 if (has_percent)
1100 f /= 100;
1102 return value_new_float (f);
1106 #undef DO_SIGN
1107 #undef SKIP_SPACES
1108 #undef SKIP_DIGITS
1110 static void
1111 set_money_format (GnmValue *v, const char *fmttxt)
1113 gnm_float f = value_get_as_float (v);
1115 if (fmttxt) {
1116 GOFormat *fmt = go_format_new_from_XL (fmttxt);
1117 value_set_fmt (v, fmt);
1118 go_format_unref (fmt);
1119 } else
1120 value_set_fmt (v, go_format_default_money ());
1122 if (f != gnm_floor (f)) {
1123 int i;
1124 for (i = 0; i < 2; i++) {
1125 GOFormat *fmt =
1126 go_format_inc_precision (VALUE_FMT (v));
1127 value_set_fmt (v, fmt);
1128 go_format_unref (fmt);
1134 * Major alternate currencies to try after the locale's currency.
1135 * We do not want three-letter currency codes in here.
1137 static const struct {
1138 const char *sym;
1139 const char *fmt;
1140 } alternate_currencies[] = {
1141 { "€", "[$€-2]0" },
1142 { "£", "£0" },
1143 { "¥", "¥0" },
1144 { "$", "$0" }
1147 static GnmValue *
1148 format_match_decimal_number (char const *text, GOFormatFamily *family,
1149 gboolean try_alternates)
1151 GString const *curr = go_locale_get_currency (NULL, NULL);
1152 GString const *thousand = go_locale_get_thousand ();
1153 GString const *decimal = go_locale_get_decimal ();
1154 GnmValue *v;
1155 unsigned ui;
1157 v = format_match_decimal_number_with_locale (text, family, curr, thousand, decimal);
1158 for (ui = 0;
1159 try_alternates && v == NULL && ui < G_N_ELEMENTS (alternate_currencies);
1160 ui++) {
1161 const char *sym = alternate_currencies[ui].sym;
1162 if (strstr (text, sym) == 0)
1163 continue;
1164 else {
1165 GString *altcurr = g_string_new (sym);
1166 v = format_match_decimal_number_with_locale
1167 (text, family, altcurr, thousand, decimal);
1168 g_string_free (altcurr, TRUE);
1169 if (v)
1170 set_money_format (v, alternate_currencies[ui].fmt);
1174 return v;
1178 * format_match:
1179 * @text: The text to parse
1180 * @cur_fmt: The current format for the value (potentially NULL)
1181 * @date_conv: optional date convention
1183 * Attempts to parse the supplied string to see if it matches a known value
1184 * format. The caller is responsible for releasing the resulting value.
1186 GnmValue *
1187 format_match (char const *text, GOFormat const *cur_fmt,
1188 GODateConventions const *date_conv)
1190 GOFormatFamily fam;
1191 GnmValue *v;
1192 int denlen;
1194 if (text[0] == '\0')
1195 return value_new_empty ();
1197 /* If it begins with a '\'' it is a string */
1198 if (text[0] == '\'')
1199 return value_new_string (text + 1);
1201 fam = cur_fmt ? go_format_get_family (cur_fmt) : GO_FORMAT_GENERAL;
1202 switch (fam) {
1203 case GO_FORMAT_TEXT:
1204 return value_new_string (text);
1206 case GO_FORMAT_NUMBER:
1207 case GO_FORMAT_CURRENCY:
1208 case GO_FORMAT_ACCOUNTING:
1209 case GO_FORMAT_PERCENTAGE:
1210 case GO_FORMAT_SCIENTIFIC:
1211 v = format_match_decimal_number (text, &fam, FALSE);
1212 if (!v)
1213 v = value_is_error (text);
1214 if (v)
1215 value_set_fmt (v, cur_fmt);
1216 return v;
1218 case GO_FORMAT_DATE: {
1219 gboolean month_before_day =
1220 gnm_format_month_before_day (cur_fmt, NULL) != 0;
1222 v = format_match_datetime (text, date_conv,
1223 month_before_day,
1224 FALSE,
1225 TRUE);
1226 if (!v)
1227 v = format_match_decimal_number (text, &fam, FALSE);
1228 if (!v)
1229 v = value_is_error (text);
1230 if (v)
1231 value_set_fmt (v, cur_fmt);
1232 return v;
1235 case GO_FORMAT_TIME: {
1236 gboolean month_before_day =
1237 gnm_format_month_before_day (cur_fmt, NULL) != 0;
1239 gboolean prefer_hour =
1240 gnm_format_has_hour (cur_fmt, NULL);
1242 v = format_match_datetime (text, date_conv,
1243 month_before_day,
1244 FALSE,
1245 FALSE);
1246 if (!v)
1247 v = format_match_time (text, TRUE, prefer_hour, FALSE);
1248 if (!v)
1249 v = format_match_decimal_number (text, &fam, FALSE);
1250 if (!v)
1251 v = value_is_error (text);
1252 if (v)
1253 value_set_fmt (v, cur_fmt);
1254 return v;
1257 case GO_FORMAT_FRACTION:
1258 v = format_match_fraction (text, &denlen, FALSE);
1259 if (!v)
1260 v = format_match_decimal_number (text, &fam, FALSE);
1261 if (!v)
1262 v = value_is_error (text);
1263 if (v)
1264 value_set_fmt (v, cur_fmt);
1265 return v;
1267 default:
1268 ; /* Nothing */
1271 /* Check basic types */
1272 v = format_match_simple (text);
1273 if (v != NULL)
1274 return v;
1276 v = format_match_decimal_number (text, &fam, TRUE);
1277 if (v) {
1278 switch (fam) {
1279 case GO_FORMAT_PERCENTAGE:
1280 value_set_fmt (v, go_format_default_percentage ());
1281 break;
1282 case GO_FORMAT_CURRENCY:
1283 if (!VALUE_FMT (v))
1284 set_money_format (v, NULL);
1285 break;
1286 case GO_FORMAT_ACCOUNTING:
1287 value_set_fmt (v, go_format_default_accounting ());
1288 break;
1289 default:
1290 ; /* Nothing */
1293 return v;
1296 v = format_match_datetime (text, date_conv,
1297 go_locale_month_before_day () != 0,
1298 TRUE,
1299 FALSE);
1300 if (v)
1301 return v;
1303 v = format_match_time (text, TRUE, TRUE, TRUE);
1304 if (v)
1305 return v;
1307 v = format_match_fraction (text, &denlen, TRUE);
1308 if (v) {
1309 char fmtstr[20];
1310 char const *qqq = "?????" + 5;
1311 GOFormat *fmt;
1313 denlen = MIN (denlen, 5);
1314 sprintf (fmtstr, "# %s/%s", qqq - denlen, qqq - denlen);
1315 fmt = go_format_new_from_XL (fmtstr);
1316 value_set_fmt (v, fmt);
1317 go_format_unref (fmt);
1318 return v;
1321 return NULL;
1325 * format_match_number:
1326 * @text: The text to parse
1327 * @cur_fmt: The current format for the value (potentially NULL)
1328 * @date_conv: optional date convention
1330 * Attempts to parse the supplied string to see if it matches a known value format.
1331 * Will eventually use the current cell format in preference to canned formats.
1332 * If @format is supplied it will get a copy of the matching format with no
1333 * additional references. The caller is responsible for releasing the
1334 * resulting value. Will ONLY return numbers.
1336 GnmValue *
1337 format_match_number (char const *text, GOFormat const *cur_fmt,
1338 GODateConventions const *date_conv)
1340 GnmValue *res = format_match (text, cur_fmt, date_conv);
1342 if (res != NULL) {
1343 if (VALUE_IS_NUMBER (res))
1344 return res;
1345 value_release (res);
1347 return NULL;