Compilation: fix warning.
[gnumeric.git] / src / number-match.c
blobcfa9d8b43e71b9dd2c2d1d82a42bdb52f865610c
1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3 * number-match.c: This file includes the support for matching
4 * entered strings as numbers (by trying to apply one of the existing
5 * cell formats).
7 * The idea is simple: we create a regular expression from the format
8 * string that would match a value entered in that format. Then, on
9 * lookup we try to match the string against every regular expression
10 * we have: if a match is found, then we decode the number using a
11 * precomputed parallel-list of subexpressions.
13 * Authors:
14 * Morten Welinder (terra@gnome.org)
15 * Miguel de Icaza (miguel@gnu.org)
17 #include <gnumeric-config.h>
18 #include <glib/gi18n-lib.h>
19 #include "gnumeric.h"
20 #include "number-match.h"
22 #include "gutils.h"
23 #include "style.h"
24 #include "gnm-format.h"
25 #include "value.h"
26 #include "mathfunc.h"
27 #include "numbers.h"
28 #include "gnm-datetime.h"
29 #include <goffice/goffice.h>
31 #include <string.h>
32 #include <stdlib.h>
33 #include <errno.h>
34 #include <locale.h>
35 #include <math.h>
36 #include <time.h>
37 #undef DEBUG_NUMBER_MATCH
40 * value_is_error: Check to see if a string begins with one of the magic
41 * error strings.
43 * @str: The string to test
45 * Returns: an error if there is one, or %NULL.
47 static GnmValue *
48 value_is_error (char const *str)
50 GnmStdError e;
52 if (str[0] != '#')
53 return NULL;
55 for (e = (GnmStdError)0; e < GNM_ERROR_UNKNOWN; e++)
56 if (0 == strcmp (str, value_error_name (e, TRUE)))
57 return value_new_error_std (NULL, e);
59 return NULL;
62 /**
63 * format_match_simple:
64 * @text: A String to match against.
66 * Attempt to match the supplied string as a simple value.
68 * WARNING WARNING WARNING : This routine should NEVER be changed to match
69 * VALUE_STRING that will break the parsers
70 * handling of named expressions.
72 GnmValue *
73 format_match_simple (char const *text)
75 /* Is it a boolean? */
76 if (0 == g_ascii_strcasecmp (text, go_locale_boolean_name (TRUE)))
77 return value_new_bool (TRUE);
78 if (0 == g_ascii_strcasecmp (text, go_locale_boolean_name (FALSE)))
79 return value_new_bool (FALSE);
81 /* Is it an error? */
83 GnmValue *err = value_is_error (text);
84 if (err != NULL)
85 return err;
88 /* Is it a floating-point number */
90 char *end;
91 gnm_float d;
93 d = gnm_utf8_strto (text, &end);
94 if (text != end && errno != ERANGE && gnm_finite (d)) {
95 /* Allow and ignore spaces at the end. */
96 while (g_ascii_isspace (*end))
97 end++;
98 if (*end == '\0')
99 return value_new_float (d);
103 return NULL;
106 static struct {
107 char *lc_time;
108 GORegexp re_MMMMddyyyy;
109 GORegexp re_ddMMMMyyyy;
110 GORegexp re_yyyymmdd1;
111 GORegexp re_yyyymmdd2;
112 GORegexp re_mmddyyyy;
113 GORegexp re_mmdd;
114 GORegexp re_hhmmss1;
115 GORegexp re_hhmmss2;
116 GORegexp re_hhmmssds;
117 GORegexp re_hhmmss_ampm;
118 } datetime_locale;
121 static void
122 datetime_locale_clear (void)
124 g_free (datetime_locale.lc_time);
125 go_regfree (&datetime_locale.re_MMMMddyyyy);
126 go_regfree (&datetime_locale.re_ddMMMMyyyy);
127 go_regfree (&datetime_locale.re_yyyymmdd1);
128 go_regfree (&datetime_locale.re_yyyymmdd2);
129 go_regfree (&datetime_locale.re_mmddyyyy);
130 go_regfree (&datetime_locale.re_mmdd);
131 go_regfree (&datetime_locale.re_hhmmss1);
132 go_regfree (&datetime_locale.re_hhmmss2);
133 go_regfree (&datetime_locale.re_hhmmssds);
134 go_regfree (&datetime_locale.re_hhmmss_ampm);
135 memset (&datetime_locale, 0, sizeof (datetime_locale));
138 static char const *
139 my_regerror (int err, GORegexp const *preg)
141 static char buffer[1024];
142 go_regerror (err, preg, buffer, sizeof (buffer));
143 return buffer;
146 static void
147 datetime_locale_setup1 (GORegexp *rx, char const *pat)
149 int ret = go_regcomp (rx, pat, GO_REG_ICASE);
150 if (ret) {
151 g_warning ("Failed to compile rx \"%s\": %s\n",
152 pat,
153 my_regerror (ret, rx));
158 static void
159 datetime_locale_setup (char const *lc_time)
161 GString *p_MMMM = g_string_sized_new (200);
162 GString *p_MMM = g_string_sized_new (200);
163 GString *p_decimal = g_string_sized_new (10);
164 char *s;
165 int m;
167 datetime_locale.lc_time = g_strdup (lc_time);
169 for (m = 1; m <= 12; m++) {
170 if (m != 1)
171 g_string_append_c (p_MMMM, '|');
172 g_string_append_c (p_MMMM, '(');
173 s = go_date_month_name (m, FALSE);
174 go_regexp_quote (p_MMMM, s);
175 g_free (s);
176 g_string_append_c (p_MMMM, ')');
178 if (m != 1)
179 g_string_append_c (p_MMM, '|');
180 g_string_append_c (p_MMM, '(');
181 s = go_date_month_name (m, TRUE);
182 go_regexp_quote (p_MMM, s);
183 /* nb_NO actually adds a "." for these abbreviations. */
184 if (g_unichar_ispunct (g_utf8_get_char (g_utf8_prev_char (p_MMM->str + p_MMM->len))))
185 g_string_append_c (p_MMM, '?');
186 g_free (s);
187 g_string_append_c (p_MMM, ')');
190 go_regexp_quote (p_decimal, go_locale_get_decimal ()->str);
193 * "Dec 1, 2000"
194 * "Dec/1/04"
195 * "December 1, 2000"
196 * "December 1 2000"
197 * "Dec-1-2000"
198 * "Dec 1"
199 * "Dec/1"
200 * "December 1"
201 * "Dec-1"
202 * "Jan 2010"
203 * "January 2010"
205 s = g_strconcat ("^(",
206 p_MMMM->str,
207 "|",
208 p_MMM->str,
209 ")(-|/|\\s)(\\d+)((,?\\s+|-|/)(\\d+))?\\b",
210 NULL);
211 datetime_locale_setup1 (&datetime_locale.re_MMMMddyyyy, s);
212 g_free (s);
215 * "1-Dec-2000"
216 * "1/Dec/04"
217 * "1-December-2000"
218 * "1. december 2000"
219 * "1. december, 2000"
220 * "1-Dec"
221 * "1/Dec"
222 * "1-December"
223 * "1. december"
225 s = g_strconcat ("^(\\d+)(-|/|\\.?\\s*)(",
226 p_MMMM->str,
227 "|",
228 p_MMM->str,
229 ")((,?\\s*|-|/)(\\d+))?\\b",
230 NULL);
231 datetime_locale_setup1 (&datetime_locale.re_ddMMMMyyyy, s);
232 g_free (s);
235 * "20001231"
236 * (with special support for 20001231:123456)
238 s = g_strconcat ("^(\\d\\d\\d\\d)(\\d\\d)(\\d\\d)(:\\d\\d\\d\\d\\d\\d(",
239 p_decimal->str,
240 "\\d*)?)?\\s*$",
241 NULL);
242 datetime_locale_setup1 (&datetime_locale.re_yyyymmdd1, s);
243 g_free (s);
246 * "1900/01/01"
247 * "1900-1-1"
249 datetime_locale_setup1 (&datetime_locale.re_yyyymmdd2,
250 "^(\\d\\d\\d\\d)[-/.](\\d+)[-/.](\\d+)\\b");
253 * "01/31/2001" [Jan 31] if month_before_day
254 * "1/2/88" [Jan 2] if month_before_day
255 * "1/2/88" [Feb 1] if !month_before_day
256 * "31/1/2001" [Jan 31] if !month_before_day
258 datetime_locale_setup1 (&datetime_locale.re_mmddyyyy,
259 "^(\\d+)[-/.](\\d+)[-/.](\\d+)\\b");
262 * "2005/2" [Feb 1]
263 * "2/2005" [Feb 1]
264 * "01/31" [Jan 31] if month_before_day
265 * "31/1" [Jan 31] if !month_before_day
267 datetime_locale_setup1 (&datetime_locale.re_mmdd,
268 "^(\\d+)([-/.])(\\d+)\\b");
271 * "15:30:00.3"
272 * "30:00.3" [A little more than 30min]
273 * "115:30:00.3"
275 /* ^(((\d+):)?(\d+):)?(\d+.\d*)\s*$ */
276 s = g_strconcat ("^(((\\d+):)?(\\d+):)?(\\d+",
277 p_decimal->str,
278 ".\\d*)\\s*$",
279 NULL);
280 datetime_locale_setup1 (&datetime_locale.re_hhmmssds, s);
281 g_free (s);
284 * "15:30:00"
285 * "15:30" [15:30:00] if prefer_hour
286 * "15:30" [00:15:30] if !prefer_hour
288 datetime_locale_setup1 (&datetime_locale.re_hhmmss1,
289 "^(\\d+):(\\d+)(:(\\d+))?\\s*$");
292 * "153000"
293 * "153000.2"
295 s = g_strconcat ("^(\\d\\d)(\\d\\d)(\\d\\d)?(",
296 p_decimal->str,
297 "\\d*)?\\s*$",
298 NULL);
299 datetime_locale_setup1 (&datetime_locale.re_hhmmss2, s);
300 g_free (s);
303 * "12:30:01.3 am"
304 * "12:30:01 am"
305 * "12:30 am"
306 * "12am"
308 s = g_strconcat ("^(\\d+)(:(\\d+)(:(\\d+(",
309 p_decimal->str,
310 "\\d*)?))?)?\\s*((am)|(pm))\\s*$",
311 NULL);
312 datetime_locale_setup1 (&datetime_locale.re_hhmmss_ampm, s);
313 g_free (s);
315 g_string_free (p_MMMM, TRUE);
316 g_string_free (p_MMM, TRUE);
317 g_string_free (p_decimal, TRUE);
320 static int
321 find_month (GORegmatch const *pm)
323 int m;
325 for (m = 1; m <= 12; m++) {
326 if (pm->rm_so != pm->rm_eo)
327 return m;
328 pm++;
331 return -1;
334 static int
335 handle_int (char const *text, GORegmatch const *pm, int min, int max, int maxlen)
337 int i = 0;
338 char const *p = text + pm->rm_so;
339 char const *end = text + pm->rm_eo;
340 int len = 0;
342 while (p != end) {
343 gunichar uc = g_utf8_get_char (p);
344 p = g_utf8_next_char (p);
345 i = (10 * i) + g_unichar_digit_value (uc);
346 len++;
348 if (i > max || len > maxlen)
349 return -1;
352 if (i >= min)
353 return i;
354 else
355 return -1;
358 static int
359 handle_day (char const *text, GORegmatch const *pm)
361 return handle_int (text, pm, 1, 31, 2);
364 static int
365 handle_month (char const *text, GORegmatch const *pm)
367 return handle_int (text, pm, 1, 12, 2);
370 static int
371 current_year (void)
373 time_t now = time (NULL);
374 struct tm *tm = localtime (&now);
375 return 1900 + tm->tm_year;
378 static int
379 handle_year (char const *text, GORegmatch const *pm)
381 int y;
383 if (pm->rm_so == pm->rm_eo)
384 return current_year ();
386 y = handle_int (text, pm, 0, 9999, 4);
388 if (y < 0)
389 return -1;
390 else if (y <= 29)
391 return 2000 + y;
392 else if (y <= 99)
393 return 1900 + y;
394 else if (y < (gnm_datetime_allow_negative () ? 1582 : 1900))
395 return -1;
396 else
397 return y;
401 static gnm_float
402 handle_float (char const *text, GORegmatch const *pm)
404 gnm_float val = 0;
405 char const *p;
406 char const *end;
407 gnm_float num = 10;
409 /* Empty means zero. */
410 if (pm->rm_so == pm->rm_eo)
411 return 0;
413 p = text + pm->rm_so;
414 end = text + pm->rm_eo;
415 while (p != end) {
416 gunichar uc = g_utf8_get_char (p);
417 int d = g_unichar_digit_value (uc);
418 p = g_utf8_next_char (p);
419 if (d < 0) break; /* Must be decimal sep. */
420 val = (10 * val) + d;
423 while (p != end) {
424 gunichar uc = g_utf8_get_char (p);
425 int d = g_unichar_digit_value (uc);
426 p = g_utf8_next_char (p);
427 val += d / num;
428 num *= 10;
431 return val;
434 static void
435 fixup_hour_ampm (gnm_float *hour, const GORegmatch *pm)
437 gboolean is_am = (pm->rm_so != pm->rm_eo);
439 if (*hour < 1 || *hour > 12) {
440 *hour = -1;
441 return;
444 if (*hour == 12)
445 *hour = 0;
446 if (!is_am)
447 *hour += 12;
450 static gboolean
451 valid_hms (gnm_float h, gnm_float m, gnm_float s,
452 gboolean allow_elapsed, char *elapsed)
454 gboolean h_ok = h >= 0 && h < 24;
455 gboolean m_ok = m >= 0 && m < 60;
456 gboolean s_ok = s >= 0 && s < 60;
458 /* Boring old clock time. */
459 if (h_ok && m_ok && s_ok) {
460 if (elapsed)
461 *elapsed = 0;
462 return TRUE;
465 if (!allow_elapsed)
466 return FALSE;
468 if (*elapsed == 'h' && m_ok && s_ok)
469 return TRUE;
471 if (*elapsed == 'm' && h == 0 && s_ok)
472 return TRUE;
474 if (*elapsed == 's' && h == 0 && m == 0)
475 return TRUE;
477 return FALSE;
480 #define DO_SIGN(sign,uc,action) \
482 if (uc == '-' || uc == UNICODE_MINUS_SIGN_C) { \
483 sign = '-'; \
484 action; \
485 } else if (uc == '+') { \
486 sign = '+'; \
487 action; \
491 #define SKIP_DIGITS(text) while (g_ascii_isdigit (*(text))) (text)++
493 #define SKIP_SPACES(text) \
494 while (*(text) && g_unichar_isspace (g_utf8_get_char (text))) \
495 (text) = g_utf8_next_char (text)
498 static GnmValue *
499 format_match_time (char const *text, gboolean allow_elapsed,
500 gboolean prefer_hour, gboolean add_format)
502 char sign = 0;
503 gunichar uc;
504 gnm_float hour, minute, second;
505 gnm_float time_val;
506 GORegmatch match[10];
507 char const *time_format = NULL;
508 GnmValue *v;
510 SKIP_SPACES (text);
512 /* AM/PM means hour is needed. No sign allowed. */
513 /* ^(\d+)(:(\d+)(:(\d+(.\d*)?))?)?\s*((am)|(pm))\s*$ */
514 /* 1 2 3 4 5 6 78 9 */
515 if (go_regexec (&datetime_locale.re_hhmmss_ampm, text, G_N_ELEMENTS (match), match, 0) == 0) {
516 hour = handle_float (text, match + 1);
517 fixup_hour_ampm (&hour, match + 8);
518 minute = handle_float (text, match + 3);
519 second = handle_float (text, match + 5);
520 if (valid_hms (hour, minute, second, FALSE, NULL)) {
521 time_format = "h:mm:ss AM/PM";
522 goto got_time;
526 uc = g_utf8_get_char (text);
527 if (allow_elapsed) {
528 DO_SIGN (sign, uc, {
529 text = g_utf8_next_char (text);
533 /* If fractional seconds are present, we know the layout. */
534 /* ^(((\d+):)?(\d+):)?(\d+.\d*)\s*$ */
535 /* 123 4 5 */
536 if (go_regexec (&datetime_locale.re_hhmmssds, text, G_N_ELEMENTS (match), match, 0) == 0) {
537 char elapsed =
538 match[3].rm_so != match[3].rm_eo
539 ? 'h'
540 : (match[4].rm_so != match[4].rm_eo
541 ? 'm'
542 : 's');
544 hour = handle_float (text, match + 3);
545 minute = handle_float (text, match + 4);
546 second = handle_float (text, match + 5);
548 if (valid_hms (hour, minute, second, allow_elapsed, &elapsed)) {
549 time_format = elapsed ? "[h]:mm:ss" : "h:mm:ss";
550 goto got_time;
554 /* ^(\d+):(\d+)(:(\d+))?\s*$ */
555 /* 1 2 3 4 */
556 if (go_regexec (&datetime_locale.re_hhmmss1, text, G_N_ELEMENTS (match), match, 0) == 0) {
557 gboolean has_all = (match[4].rm_so != match[4].rm_eo);
558 char elapsed;
559 const char *time_format_elapsed;
561 if (prefer_hour || has_all) {
562 hour = handle_float (text, match + 1);
563 minute = handle_float (text, match + 2);
564 second = handle_float (text, match + 4);
565 time_format = has_all ? "h:mm:ss" : "h:mm";
566 time_format_elapsed = has_all ? "[h]:mm:ss" : "[h]:mm";
567 elapsed = 'h';
568 } else {
569 hour = 0;
570 minute = handle_float (text, match + 1);
571 second = handle_float (text, match + 2);
572 time_format = "mm:ss";
573 time_format_elapsed = "[m]:ss";
574 elapsed = 'm';
577 if (valid_hms (hour, minute, second, allow_elapsed, &elapsed)) {
578 if (elapsed)
579 time_format = time_format_elapsed;
580 goto got_time;
584 /* ^(\d\d)(\d\d)(\d\d)?(\.\d*)?\s*$ */
585 /* 1 2 3 4 */
586 if (go_regexec (&datetime_locale.re_hhmmss2, text, G_N_ELEMENTS (match), match, 0) == 0) {
587 gboolean has3 = (match[3].rm_so != match[3].rm_eo);
588 gboolean hasfrac = (match[4].rm_so != match[4].rm_eo);
589 char elapsed;
590 const char *time_format_elapsed;
592 if ((prefer_hour && !hasfrac) || has3) {
593 hour = handle_float (text, match + 1);
594 minute = handle_float (text, match + 2);
595 second = handle_float (text, match + 3) + handle_float (text, match + 4);
596 time_format = "h:mm:ss";
597 time_format_elapsed = "[h]:mm:ss";
598 elapsed = 'h';
599 } else {
600 hour = 0;
601 minute = handle_float (text, match + 1);
602 second = handle_float (text, match + 2) + handle_float (text, match + 4);
603 time_format = "mm:ss";
604 time_format_elapsed = "[m]:ss";
605 elapsed = 'm';
608 if (valid_hms (hour, minute, second, allow_elapsed, &elapsed)) {
609 if (elapsed)
610 time_format = time_format_elapsed;
611 goto got_time;
615 return NULL;
617 got_time:
618 time_val = (second + 60 * (minute + 60 * hour)) / (24 * 60 * 60);
619 if (sign == '-')
620 time_val = 0 - time_val;
621 v = value_new_float (time_val);
623 if (add_format) {
624 GOFormat *fmt = go_format_new_from_XL (time_format);
625 value_set_fmt (v, fmt);
626 go_format_unref (fmt);
629 return v;
632 static gboolean
633 valid_dmy (int d, int m, int y)
635 /* Avoid sign-induced problem. d and m are capped. */
636 return y >= 0 && g_date_valid_dmy (d, m, y);
640 GnmValue *
641 format_match_datetime (char const *text,
642 GODateConventions const *date_conv,
643 gboolean month_before_day,
644 gboolean add_format,
645 gboolean presume_date)
647 int day, month, year;
648 GDate date;
649 gnm_float time_val, date_val;
650 char const *lc_time = setlocale (LC_TIME, NULL);
651 GORegmatch match[31];
652 gunichar uc;
653 int dig1;
654 char *date_format = NULL;
655 GnmValue *res = NULL;
656 char *time_format = NULL;
658 if (lc_time != datetime_locale.lc_time &&
659 (lc_time == NULL ||
660 datetime_locale.lc_time == NULL ||
661 strcmp (lc_time, datetime_locale.lc_time))) {
662 datetime_locale_clear ();
663 datetime_locale_setup (lc_time);
666 SKIP_SPACES (text);
667 uc = g_utf8_get_char (text);
668 dig1 = g_unichar_digit_value (uc);
670 /* ^(MMMM)(-|/|\s)(\d+)((,\s+|-|/)(\d+))?\b */
671 /* 1 26 27 28 30 */
672 /* 29 */
673 if (dig1 < 0 &&
674 go_regexec (&datetime_locale.re_MMMMddyyyy, text, G_N_ELEMENTS (match), match, 0) == 0) {
675 month = find_month (&match[2]);
676 if (month == -1) month = find_month (&match[2 + 12]);
677 day = handle_day (text, match + 27);
678 if (day == -1 &&
679 match[27].rm_eo - match[27].rm_so >= 4 &&
680 match[28].rm_so == match[28].rm_eo) {
681 /* Only one number with 4+ digits -- might be a year. */
682 year = handle_year (text, match + 27);
683 day = 1;
684 } else {
685 year = handle_year (text, match + 30);
687 if (valid_dmy (day, month, year)) {
688 date_format = gnm_format_frob_slashes ("mmm/dd/yyyy");
689 text += match[0].rm_eo;
690 goto got_date;
694 /* ^(\d+)(-|/|\.?\s*)(MMMM)((,?\s*|-|/)(\d+))?\b */
695 /* 1 2 3 28 30 */
696 /* 29 */
697 if (dig1 >= 0 &&
698 go_regexec (&datetime_locale.re_ddMMMMyyyy, text, G_N_ELEMENTS (match), match, 0) == 0) {
699 day = handle_day (text, match + 1);
700 month = find_month (&match[4]);
701 if (month == -1) month = find_month (&match[4 + 12]);
702 year = handle_year (text, match + 30);
703 if (valid_dmy (day, month, year)) {
704 date_format = g_strdup ("d-mmm-yyyy");
705 text += match[0].rm_eo;
706 goto got_date;
710 /* ^(\d\d\d\d)(\d\d)(\d\d)(:\d\d\d\d\d\d(\.\d*)?)?\s*$ */
711 /* 1 2 3 4 5 */
712 if (dig1 > 0 && /* Exclude zero. */
713 go_regexec (&datetime_locale.re_yyyymmdd1, text, G_N_ELEMENTS (match), match, 0) == 0) {
714 year = handle_year (text, match + 1);
715 month = handle_month (text, match + 2);
716 day = handle_day (text, match + 3);
717 if (valid_dmy (day, month, year)) {
718 date_format = g_strdup ("yyyy-mmm-dd");
719 text += match[3].rm_eo;
720 if (*text == ':')
721 text++;
722 goto got_date;
726 /* ^(\d\d\d\d)[-/.](\d\d)[-/.](\d\d)\b */
727 /* 1 2 3 */
728 if (dig1 > 0 && /* Exclude zero. */
729 go_regexec (&datetime_locale.re_yyyymmdd2, text, G_N_ELEMENTS (match), match, 0) == 0) {
730 year = handle_year (text, match + 1);
731 month = handle_month (text, match + 2);
732 day = handle_day (text, match + 3);
733 if (valid_dmy (day, month, year)) {
734 date_format = g_strdup ("yyyy-mmm-dd");
735 text += match[0].rm_eo;
736 goto got_date;
740 /* ^(\d+)[-/.](\d+)[-/.](\d+)\b */
741 /* 1 2 3 */
742 if (dig1 >= 0 &&
743 go_regexec (&datetime_locale.re_mmddyyyy, text, G_N_ELEMENTS (match), match, 0) == 0) {
744 if (month_before_day) {
745 month = handle_month (text, match + 1);
746 day = handle_day (text, match + 2);
747 } else {
748 month = handle_month (text, match + 2);
749 day = handle_day (text, match + 1);
751 year = handle_year (text, match + 3);
752 if (valid_dmy (day, month, year)) {
753 date_format = gnm_format_frob_slashes (month_before_day
754 ? "m/d/yyyy"
755 : "d/m/yyyy");
756 text += match[0].rm_eo;
757 goto got_date;
761 /* ^(\d+)([-/.])(\d+)\b */
762 /* 1 2 3 */
763 if (dig1 >= 0 &&
764 go_regexec (&datetime_locale.re_mmdd, text, G_N_ELEMENTS (match), match, 0) == 0) {
766 * Unless we already have a date format, do not accept
767 * 1-10, for example. See bug 376090.
769 gboolean good_ddmmsep =
770 presume_date ||
771 text[match[2].rm_so] == '/';
772 if (match[1].rm_eo - match[1].rm_so == 4) {
773 year = handle_year (text, match + 1);
774 month = handle_month (text, match + 3);
775 day = 1;
776 date_format = g_strdup ("yyyy/m");
777 } else if (match[3].rm_eo - match[3].rm_so == 4) {
778 month = handle_month (text, match + 1);
779 year = handle_year (text, match + 3);
780 day = 1;
781 date_format = g_strdup ("m/yyyy");
782 } else if (good_ddmmsep && month_before_day) {
783 month = handle_month (text, match + 1);
784 day = handle_day (text, match + 3);
785 year = current_year ();
786 date_format = gnm_format_frob_slashes ("m/d/yyyy");
787 } else if (good_ddmmsep) {
788 month = handle_month (text, match + 3);
789 day = handle_day (text, match + 1);
790 year = current_year ();
791 date_format = gnm_format_frob_slashes ("d/m/yyyy");
792 } else
793 year = month = day = -1;
794 if (valid_dmy (day, month, year)) {
795 text += match[0].rm_eo;
796 goto got_date;
800 g_free (date_format);
801 return NULL;
803 got_date:
804 g_date_clear (&date, 1);
805 g_date_set_dmy (&date, day, month, year);
806 if (!g_date_valid (&date))
807 goto out;
808 date_val = go_date_g_to_serial (&date, date_conv);
810 SKIP_SPACES (text);
812 if (*text) {
813 GnmValue *v = format_match_time (text, FALSE,
814 TRUE, add_format);
815 GOFormat const *fmt;
816 if (!v)
817 goto out;
818 time_val = value_get_as_float (v);
819 fmt = VALUE_FMT (v);
820 if (fmt)
821 time_format = g_strdup (go_format_as_XL (fmt));
822 value_release (v);
823 } else
824 time_val = 0;
826 res = value_new_float (date_val + time_val);
827 if (add_format) {
828 GOFormat *fmt;
829 if (time_format) {
830 char *format = g_strconcat (date_format,
831 " ",
832 time_format,
833 NULL);
834 fmt = go_format_new_from_XL (format);
835 g_free (format);
836 } else
837 fmt = go_format_new_from_XL (date_format);
838 value_set_fmt (res, fmt);
839 go_format_unref (fmt);
842 out:
843 g_free (date_format);
844 g_free (time_format);
845 return res;
849 * Match "12/23", "-12/23", "1 2/3", "-1 2/3", and even "-123".
850 * Does not match "1/0".
852 * Spaces are allowed anywhere but between digits and between
853 * sign and digits.
855 * The number of digits in the denominator is stored in @denlen.
857 static GnmValue *
858 format_match_fraction (char const *text, int *denlen, gboolean mixed_only)
860 char sign = 0;
861 gnm_float whole, num, den, f;
862 char const *start;
863 gunichar uc;
865 SKIP_SPACES (text);
867 uc = g_utf8_get_char (text);
868 DO_SIGN (sign, uc, { text = g_utf8_next_char (text); });
870 if (*text == 0 || !g_ascii_isdigit (*text))
871 return NULL;
873 start = text;
874 SKIP_DIGITS (text);
875 SKIP_SPACES (text);
877 if (*text == '/') {
878 if (mixed_only)
879 return NULL;
880 whole = 0;
881 } else {
882 whole = gnm_utf8_strto (start, NULL);
883 if (errno == ERANGE)
884 return NULL;
885 if (*text == 0) {
886 num = 0;
887 den = 1;
888 *denlen = 0;
889 goto done;
890 } else if (!g_ascii_isdigit (*text))
891 return NULL;
893 start = text;
894 SKIP_DIGITS (text);
895 SKIP_SPACES (text);
897 if (*text != '/')
898 return NULL;
901 num = gnm_utf8_strto (start, NULL);
902 if (errno == ERANGE)
903 return NULL;
905 text++;
906 SKIP_SPACES (text);
907 start = text;
908 SKIP_DIGITS (text);
909 *denlen = text - start;
910 SKIP_SPACES (text);
912 if (*text != 0)
913 return NULL;
915 den = gnm_utf8_strto (start, NULL);
916 if (errno == ERANGE)
917 return NULL;
918 if (den == 0)
919 return NULL;
921 done:
922 f = whole + num / den;
923 if (sign == '-')
924 f = -f;
926 return value_new_float (f);
930 GnmValue *
931 format_match_decimal_number_with_locale (char const *text, GOFormatFamily *family,
932 GString const *curr, GString const *thousand,
933 GString const *decimal)
935 gboolean par_open = FALSE;
936 gboolean par_close = FALSE;
937 gboolean has_curr = FALSE;
938 gboolean has_percent = FALSE;
939 char sign = 0;
940 GString *numstr = g_string_sized_new (20);
941 gboolean last_was_digit = FALSE;
942 gboolean allow1000 = (thousand != NULL) && (thousand->len != 0);
944 g_return_val_if_fail (curr != NULL, NULL);
945 g_return_val_if_fail (decimal != NULL, NULL);
947 while (*text) {
948 gunichar uc = g_utf8_get_char (text);
950 if (!has_curr && strncmp (curr->str, text, curr->len) == 0) {
951 has_curr = TRUE;
952 text += curr->len;
953 continue;
956 if (g_unichar_isspace (uc)) {
957 text = g_utf8_next_char (text);
958 continue;
961 if (!sign) {
962 DO_SIGN (sign, uc, {
963 g_string_append_c (numstr, sign);
964 text = g_utf8_next_char (text);
965 continue;
969 if (!par_open && !sign && uc == '(') {
970 sign = '-';
971 g_string_append_c (numstr, sign);
972 par_open = TRUE;
973 text++;
974 continue;
977 break;
980 while (*text) {
981 char c = *text;
983 if (last_was_digit &&
984 allow1000 &&
985 strncmp (thousand->str, text, thousand->len) == 0 &&
986 g_ascii_isdigit (text[thousand->len]) &&
987 g_ascii_isdigit (text[thousand->len + 1]) &&
988 g_ascii_isdigit (text[thousand->len + 2])) {
989 text += thousand->len;
990 continue;
993 if (strncmp (decimal->str, text, decimal->len) == 0) {
994 GString const *local_decimal = go_locale_get_decimal ();
995 g_string_append_len (numstr, local_decimal->str, local_decimal->len);
996 text += decimal->len;
997 allow1000 = FALSE;
998 continue;
1001 if (g_ascii_isdigit (c)) {
1002 g_string_append_c (numstr, c);
1003 text++;
1004 last_was_digit = TRUE;
1005 continue;
1007 last_was_digit = FALSE;
1009 if (c == 'e' || c == 'E') {
1010 char esign = 0;
1011 gunichar uc;
1014 * Pretend to have seen a sign so we don't accept
1015 * a "-" at the end.
1017 if (!sign)
1018 sign = '+';
1019 allow1000 = FALSE;
1021 g_string_append_c (numstr, c);
1022 text++;
1024 uc = g_utf8_get_char (text);
1025 DO_SIGN (esign, uc, {
1026 text = g_utf8_next_char (text);
1027 g_string_append_c (numstr, esign);
1030 continue;
1033 break;
1036 while (*text) {
1037 gunichar uc = g_utf8_get_char (text);
1039 if (!has_curr && strncmp (curr->str, text, curr->len) == 0) {
1040 has_curr = TRUE;
1041 text += curr->len;
1042 continue;
1045 if (g_unichar_isspace (uc)) {
1046 text = g_utf8_next_char (text);
1047 continue;
1050 if (!sign) {
1051 DO_SIGN (sign, uc, {
1052 g_string_prepend_c (numstr, sign);
1053 text = g_utf8_next_char (text);
1054 continue;
1058 if (!par_close && par_open && uc == ')') {
1059 par_close = TRUE;
1060 text++;
1061 continue;
1064 if (!has_percent && uc == '%') {
1065 has_percent = TRUE;
1066 text++;
1067 continue;
1070 break;
1073 if (*text ||
1074 numstr->len == 0 ||
1075 par_open != par_close ||
1076 (has_percent && (par_open || has_curr))) {
1077 g_string_free (numstr, TRUE);
1078 return NULL;
1079 } else {
1080 gnm_float f;
1081 char *end;
1082 gboolean bad;
1084 f = gnm_utf8_strto (numstr->str, &end);
1085 bad = *end || errno == ERANGE;
1086 g_string_free (numstr, TRUE);
1088 if (bad)
1089 return NULL;
1091 if (par_open)
1092 *family = GO_FORMAT_ACCOUNTING;
1093 else if (has_curr)
1094 *family = GO_FORMAT_CURRENCY;
1095 else if (has_percent)
1096 *family = GO_FORMAT_PERCENTAGE;
1097 else
1098 *family = GO_FORMAT_GENERAL;
1100 if (has_percent)
1101 f /= 100;
1103 return value_new_float (f);
1107 #undef DO_SIGN
1108 #undef SKIP_SPACES
1109 #undef SKIP_DIGITS
1111 static void
1112 set_money_format (GnmValue *v, const char *fmttxt)
1114 gnm_float f = value_get_as_float (v);
1116 if (fmttxt) {
1117 GOFormat *fmt = go_format_new_from_XL (fmttxt);
1118 value_set_fmt (v, fmt);
1119 go_format_unref (fmt);
1120 } else
1121 value_set_fmt (v, go_format_default_money ());
1123 if (f != gnm_floor (f)) {
1124 int i;
1125 for (i = 0; i < 2; i++) {
1126 GOFormat *fmt =
1127 go_format_inc_precision (VALUE_FMT (v));
1128 value_set_fmt (v, fmt);
1129 go_format_unref (fmt);
1135 * Major alternate currencies to try after the locale's currency.
1136 * We do not want three-letter currency codes in here.
1138 static const struct {
1139 const char *sym;
1140 const char *fmt;
1141 } alternate_currencies[] = {
1142 { "€", "[$€-2]0" },
1143 { "£", "£0" },
1144 { "¥", "¥0" },
1145 { "$", "$0" }
1148 static GnmValue *
1149 format_match_decimal_number (char const *text, GOFormatFamily *family,
1150 gboolean try_alternates)
1152 GString const *curr = go_locale_get_currency (NULL, NULL);
1153 GString const *thousand = go_locale_get_thousand ();
1154 GString const *decimal = go_locale_get_decimal ();
1155 GnmValue *v;
1156 unsigned ui;
1158 v = format_match_decimal_number_with_locale (text, family, curr, thousand, decimal);
1159 for (ui = 0;
1160 try_alternates && v == NULL && ui < G_N_ELEMENTS (alternate_currencies);
1161 ui++) {
1162 const char *sym = alternate_currencies[ui].sym;
1163 if (strstr (text, sym) == 0)
1164 continue;
1165 else {
1166 GString *altcurr = g_string_new (sym);
1167 v = format_match_decimal_number_with_locale
1168 (text, family, altcurr, thousand, decimal);
1169 g_string_free (altcurr, TRUE);
1170 if (v)
1171 set_money_format (v, alternate_currencies[ui].fmt);
1175 return v;
1179 * format_match:
1180 * @text: The text to parse
1181 * @cur_fmt: The current format for the value (potentially NULL)
1182 * @date_conv: optional date convention
1184 * Attempts to parse the supplied string to see if it matches a known value
1185 * format. The caller is responsible for releasing the resulting value.
1187 GnmValue *
1188 format_match (char const *text, GOFormat const *cur_fmt,
1189 GODateConventions const *date_conv)
1191 GOFormatFamily fam;
1192 GnmValue *v;
1193 int denlen;
1195 if (text[0] == '\0')
1196 return value_new_empty ();
1198 /* If it begins with a '\'' it is a string */
1199 if (text[0] == '\'')
1200 return value_new_string (text + 1);
1202 fam = cur_fmt ? go_format_get_family (cur_fmt) : GO_FORMAT_GENERAL;
1203 switch (fam) {
1204 case GO_FORMAT_TEXT:
1205 return value_new_string (text);
1207 case GO_FORMAT_NUMBER:
1208 case GO_FORMAT_CURRENCY:
1209 case GO_FORMAT_ACCOUNTING:
1210 case GO_FORMAT_PERCENTAGE:
1211 case GO_FORMAT_SCIENTIFIC:
1212 v = format_match_decimal_number (text, &fam, FALSE);
1213 if (!v)
1214 v = value_is_error (text);
1215 if (v)
1216 value_set_fmt (v, cur_fmt);
1217 return v;
1219 case GO_FORMAT_DATE: {
1220 gboolean month_before_day =
1221 gnm_format_month_before_day (cur_fmt, NULL) != 0;
1223 v = format_match_datetime (text, date_conv,
1224 month_before_day,
1225 FALSE,
1226 TRUE);
1227 if (!v)
1228 v = format_match_decimal_number (text, &fam, FALSE);
1229 if (!v)
1230 v = value_is_error (text);
1231 if (v)
1232 value_set_fmt (v, cur_fmt);
1233 return v;
1236 case GO_FORMAT_TIME: {
1237 gboolean month_before_day =
1238 gnm_format_month_before_day (cur_fmt, NULL) != 0;
1240 gboolean prefer_hour =
1241 gnm_format_has_hour (cur_fmt, NULL);
1243 v = format_match_datetime (text, date_conv,
1244 month_before_day,
1245 FALSE,
1246 FALSE);
1247 if (!v)
1248 v = format_match_time (text, TRUE, prefer_hour, FALSE);
1249 if (!v)
1250 v = format_match_decimal_number (text, &fam, FALSE);
1251 if (!v)
1252 v = value_is_error (text);
1253 if (v)
1254 value_set_fmt (v, cur_fmt);
1255 return v;
1258 case GO_FORMAT_FRACTION:
1259 v = format_match_fraction (text, &denlen, FALSE);
1260 if (!v)
1261 v = format_match_decimal_number (text, &fam, FALSE);
1262 if (!v)
1263 v = value_is_error (text);
1264 if (v)
1265 value_set_fmt (v, cur_fmt);
1266 return v;
1268 default:
1269 ; /* Nothing */
1272 /* Check basic types */
1273 v = format_match_simple (text);
1274 if (v != NULL)
1275 return v;
1277 v = format_match_decimal_number (text, &fam, TRUE);
1278 if (v) {
1279 switch (fam) {
1280 case GO_FORMAT_PERCENTAGE:
1281 value_set_fmt (v, go_format_default_percentage ());
1282 break;
1283 case GO_FORMAT_CURRENCY:
1284 if (!VALUE_FMT (v))
1285 set_money_format (v, NULL);
1286 break;
1287 case GO_FORMAT_ACCOUNTING:
1288 value_set_fmt (v, go_format_default_accounting ());
1289 break;
1290 default:
1291 ; /* Nothing */
1294 return v;
1297 v = format_match_datetime (text, date_conv,
1298 go_locale_month_before_day () != 0,
1299 TRUE,
1300 FALSE);
1301 if (v)
1302 return v;
1304 v = format_match_time (text, TRUE, TRUE, TRUE);
1305 if (v)
1306 return v;
1308 v = format_match_fraction (text, &denlen, TRUE);
1309 if (v) {
1310 char fmtstr[20];
1311 char const *qqq = "?????" + 5;
1312 GOFormat *fmt;
1314 denlen = MIN (denlen, 5);
1315 sprintf (fmtstr, "# %s/%s", qqq - denlen, qqq - denlen);
1316 fmt = go_format_new_from_XL (fmtstr);
1317 value_set_fmt (v, fmt);
1318 go_format_unref (fmt);
1319 return v;
1322 return NULL;
1326 * format_match_number:
1327 * @text: The text to parse
1328 * @cur_fmt: The current format for the value (potentially NULL)
1329 * @date_conv: optional date convention
1331 * Attempts to parse the supplied string to see if it matches a known value format.
1332 * Will eventually use the current cell format in preference to canned formats.
1333 * If @format is supplied it will get a copy of the matching format with no
1334 * additional references. The caller is responsible for releasing the
1335 * resulting value. Will ONLY return numbers.
1337 GnmValue *
1338 format_match_number (char const *text, GOFormat const *cur_fmt,
1339 GODateConventions const *date_conv)
1341 GnmValue *res = format_match (text, cur_fmt, date_conv);
1343 if (res != NULL) {
1344 if (VALUE_IS_NUMBER (res))
1345 return res;
1346 value_release (res);
1348 return NULL;