src/stf-parse.c

   1 /*
   2  * stf-parse.c : Structured Text Format parser. (STF)
   3  *               A general purpose engine for parsing data
   4  *               in CSV and Fixed width format.
   5  *
   6  *
   7  * Copyright (C) Almer. S. Tigelaar.
   8  * EMail: almer1@dds.nl or almer-t@bigfoot.com
   9  *
  10  * Copyright (C) 2003 Andreas J. Guelzow <aguelzow@taliesin.ca>
  11  * Copyright (C) 2003,2008-2009 Morten Welinder <terra@gnome.org>
  12  *
  13  * This program is free software; you can redistribute it and/or modify
  14  * it under the terms of the GNU General Public License as published by
  15  * the Free Software Foundation; either version 2 of the License, or
  16  * (at your option) any later version.
  17  *
  18  * This program is distributed in the hope that it will be useful,
  19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21  * GNU General Public License for more details.
  22  *
  23  * You should have received a copy of the GNU General Public License
  24  * along with this program; if not, see <https://www.gnu.org/licenses/>.
  25  */
  26
  27 #include <gnumeric-config.h>
  28 #include <glib/gi18n-lib.h>
  29 #include <gnumeric.h>
  30 #include <stf-parse.h>
  31 #include <stf-export.h>
  32
  33 #include <workbook.h>
  34 #include <cell.h>
  35 #include <sheet.h>
  36 #include <expr.h>
  37 #include <clipboard.h>
  38 #include <sheet-style.h>
  39 #include <value.h>
  40 #include <mstyle.h>
  41 #include <number-match.h>
  42 #include <gutils.h>
  43 #include <parse-util.h>
  44 #include <number-match.h>
  45 #include <gnm-format.h>
  46 #include <ranges.h>
  47 #include <goffice/goffice.h>
  48
  49 #include <stdlib.h>
  50 #include <locale.h>
  51 #include <string.h>
  52
  53 #define SETUP_LOCALE_SWITCH char *oldlocale = NULL
  54
  55 #define START_LOCALE_SWITCH if (parseoptions->locale) {\
  56 oldlocale = g_strdup(go_setlocale (LC_ALL, NULL)); \
  57 go_setlocale(LC_ALL, parseoptions->locale);}
  58
  59 #define END_LOCALE_SWITCH if (oldlocale) {\
  60 go_setlocale(LC_ALL, oldlocale);\
  61 g_free (oldlocale);}
  62
  63 /* Source_t struct, used for interchanging parsing information between the low level parse functions */
  64 typedef struct {
  65         GStringChunk *chunk;
  66         char const *position;  /* Indicates the current position within data */
  67
  68         /* Used internally for fixed width parsing */
  69         int splitpos;          /* Indicates current position in splitpositions array */
  70         int linepos;           /* Position on the current line */
  71 } Source_t;
  72
  73 /* Struct used for autodiscovery */
  74 typedef struct {
  75         int start;
  76         int stop;
  77 } AutoDiscovery_t;
  78
  79 /*
  80  * Some silly dude make the length field an unsigned int.  C just does
  81  * not deal very well with that.
  82  */
  83 static inline int
  84 my_garray_len (GArray const *a)
  85 {
  86         return (int)a->len;
  87 }
  88
  89 static char *
  90 my_utf8_strchr (const char *p, gunichar uc)
  91 {
  92         return uc < 0x7f ? strchr (p, uc) : g_utf8_strchr (p, -1, uc);
  93 }
  94
  95 static int
  96 compare_terminator (char const *s, StfParseOptions_t *parseoptions)
  97 {
  98         guchar const *us = (guchar const *)s;
  99         GSList *l;
 100
 101         if (*us > parseoptions->compiled_terminator.max ||
 102             *us < parseoptions->compiled_terminator.min)
 103                 return 0;
 104
 105         for (l = parseoptions->terminator; l; l = l->next) {
 106                 char const *term = l->data;
 107                 char const *d = s;
 108
 109                 while (*term) {
 110                         if (*d != *term)
 111                                 goto next;
 112                         term++;
 113                         d++;
 114                 }
 115                 return d - s;
 116
 117         next:
 118                 ;
 119         }
 120         return 0;
 121 }
 122
 123
 124 /*******************************************************************************************************
 125  * STF PARSE OPTIONS : StfParseOptions related
 126  *******************************************************************************************************/
 127
 128 static void
 129 gnm_g_string_free (GString *s)
 130 {
 131         if (s) g_string_free (s, TRUE);
 132 }
 133
 134
 135 /**
 136  * stf_parse_options_new:
 137  *
 138  * This will return a new StfParseOptions_t struct.
 139  * The struct should, after being used, freed with stf_parse_options_free.
 140  **/
 141 static StfParseOptions_t *
 142 stf_parse_options_new (void)
 143 {
 144         StfParseOptions_t* parseoptions = g_new0 (StfParseOptions_t, 1);
 145
 146         parseoptions->parsetype   = PARSE_TYPE_NOTSET;
 147
 148         parseoptions->terminator  = NULL;
 149         stf_parse_options_add_line_terminator (parseoptions, "\r\n");
 150         stf_parse_options_add_line_terminator (parseoptions, "\n");
 151         stf_parse_options_add_line_terminator (parseoptions, "\r");
 152
 153         parseoptions->trim_spaces = (TRIM_TYPE_RIGHT | TRIM_TYPE_LEFT);
 154         parseoptions->locale = NULL;
 155
 156         parseoptions->splitpositions = NULL;
 157         stf_parse_options_fixed_splitpositions_clear (parseoptions);
 158
 159         parseoptions->stringindicator = '"';
 160         parseoptions->indicator_2x_is_single = TRUE;
 161         parseoptions->sep.duplicates = FALSE;
 162         parseoptions->trim_seps = FALSE;
 163
 164         parseoptions->sep.str = NULL;
 165         parseoptions->sep.chr = NULL;
 166
 167         parseoptions->col_autofit_array = NULL;
 168         parseoptions->col_import_array = NULL;
 169         parseoptions->col_import_array_len = 0;
 170         parseoptions->formats = g_ptr_array_new_with_free_func ((GDestroyNotify)go_format_unref);
 171         parseoptions->formats_decimal = g_ptr_array_new_with_free_func ((GDestroyNotify)gnm_g_string_free);
 172         parseoptions->formats_thousand = g_ptr_array_new_with_free_func ((GDestroyNotify)gnm_g_string_free);
 173         parseoptions->formats_curr = g_ptr_array_new_with_free_func ((GDestroyNotify)gnm_g_string_free);
 174
 175         parseoptions->cols_exceeded = FALSE;
 176         parseoptions->rows_exceeded = FALSE;
 177         parseoptions->ref_count = 1;
 178
 179         return parseoptions;
 180 }
 181
 182 /**
 183  * stf_parse_options_free:
 184  *
 185  * will free @parseoptions, note that this will not free the splitpositions
 186  * member (GArray) of the struct, the caller is responsible for that.
 187  **/
 188 void
 189 stf_parse_options_free (StfParseOptions_t *parseoptions)
 190 {
 191         g_return_if_fail (parseoptions != NULL);
 192
 193         if (parseoptions->ref_count-- > 1)
 194                 return;
 195
 196         g_free (parseoptions->col_import_array);
 197         g_free (parseoptions->col_autofit_array);
 198         g_free (parseoptions->locale);
 199         g_free (parseoptions->sep.chr);
 200
 201         if (parseoptions->sep.str) {
 202                 GSList *l;
 203
 204                 for (l = parseoptions->sep.str; l != NULL; l = l->next)
 205                         g_free ((char *) l->data);
 206                 g_slist_free (parseoptions->sep.str);
 207         }
 208
 209         g_array_free (parseoptions->splitpositions, TRUE);
 210
 211         stf_parse_options_clear_line_terminator (parseoptions);
 212
 213         g_ptr_array_free (parseoptions->formats, TRUE);
 214         g_ptr_array_free (parseoptions->formats_decimal, TRUE);
 215         g_ptr_array_free (parseoptions->formats_thousand, TRUE);
 216         g_ptr_array_free (parseoptions->formats_curr, TRUE);
 217
 218         g_free (parseoptions);
 219 }
 220
 221 static StfParseOptions_t *
 222 stf_parse_options_ref (StfParseOptions_t *parseoptions)
 223 {
 224         parseoptions->ref_count++;
 225         return parseoptions;
 226 }
 227
 228 GType
 229 stf_parse_options_get_type (void)
 230 {
 231         static GType t = 0;
 232
 233         if (t == 0) {
 234                 t = g_boxed_type_register_static ("StfParseOptions_t",
 235                          (GBoxedCopyFunc)stf_parse_options_ref,
 236                          (GBoxedFreeFunc)stf_parse_options_free);
 237         }
 238         return t;
 239 }
 240
 241 void
 242 stf_parse_options_set_type (StfParseOptions_t *parseoptions, StfParseType_t const parsetype)
 243 {
 244         g_return_if_fail (parseoptions != NULL);
 245         g_return_if_fail (parsetype == PARSE_TYPE_CSV || parsetype == PARSE_TYPE_FIXED);
 246
 247         parseoptions->parsetype = parsetype;
 248 }
 249
 250 static gint
 251 long_string_first (gchar const *a, gchar const *b)
 252 {
 253         /* This actually is UTF-8 safe.  */
 254         return strlen (b) - strlen (a);
 255 }
 256
 257 static void
 258 compile_terminators (StfParseOptions_t *parseoptions)
 259 {
 260         GSList *l;
 261
 262         parseoptions->terminator =
 263                 g_slist_sort (parseoptions->terminator,
 264                               (GCompareFunc)long_string_first);
 265         parseoptions->compiled_terminator.min = 255;
 266         parseoptions->compiled_terminator.max = 0;
 267         for (l = parseoptions->terminator; l; l = l->next) {
 268                 const guchar *term = l->data;
 269                 parseoptions->compiled_terminator.min =
 270                         MIN (parseoptions->compiled_terminator.min, *term);
 271                 parseoptions->compiled_terminator.max =
 272                         MAX (parseoptions->compiled_terminator.max, *term);
 273         }
 274 }
 275
 276 /**
 277  * stf_parse_options_add_line_terminator:
 278  *
 279  * This will add to the line terminators, in both the Fixed width and CSV delimited importers
 280  * this indicates the end of a row.
 281  *
 282  **/
 283 void
 284 stf_parse_options_add_line_terminator (StfParseOptions_t *parseoptions, char const *terminator)
 285 {
 286         g_return_if_fail (parseoptions != NULL);
 287         g_return_if_fail (terminator != NULL && *terminator != 0);
 288
 289         GO_SLIST_PREPEND (parseoptions->terminator, g_strdup (terminator));
 290         compile_terminators (parseoptions);
 291 }
 292
 293 /**
 294  * stf_parse_options_clear_line_terminator:
 295  *
 296  * This will clear the line terminator, in both the Fixed width and CSV delimited importers
 297  * this indicates the end of a row.
 298  *
 299  **/
 300 void
 301 stf_parse_options_clear_line_terminator (StfParseOptions_t *parseoptions)
 302 {
 303         g_return_if_fail (parseoptions != NULL);
 304
 305         g_slist_free_full (parseoptions->terminator, g_free);
 306         parseoptions->terminator = NULL;
 307         compile_terminators (parseoptions);
 308 }
 309
 310 /**
 311  * stf_parse_options_set_trim_spaces:
 312  *
 313  * If enabled will trim spaces in every parsed field on left and/or right
 314  * sides.
 315  **/
 316 void
 317 stf_parse_options_set_trim_spaces (StfParseOptions_t *parseoptions, StfTrimType_t const trim_spaces)
 318 {
 319         g_return_if_fail (parseoptions != NULL);
 320
 321         parseoptions->trim_spaces = trim_spaces;
 322 }
 323
 324 /**
 325  * stf_parse_options_csv_set_separators:
 326  * @parseoptions: #StfParseOptions_t
 327  * @character:
 328  * @seps: (element-type utf8): the separators to be used
 329  *
 330  * A copy is made of the parameters.
 331  **/
 332 void
 333 stf_parse_options_csv_set_separators (StfParseOptions_t *parseoptions,
 334                                       char const *character,
 335                                       GSList const *seps)
 336 {
 337         g_return_if_fail (parseoptions != NULL);
 338
 339         g_free (parseoptions->sep.chr);
 340         parseoptions->sep.chr = g_strdup (character);
 341
 342         g_slist_free_full (parseoptions->sep.str, g_free);
 343         parseoptions->sep.str =
 344                 g_slist_copy_deep ((GSList *)seps, (GCopyFunc)g_strdup, NULL);
 345 }
 346
 347 void
 348 stf_parse_options_csv_set_stringindicator (StfParseOptions_t *parseoptions, gunichar const stringindicator)
 349 {
 350         g_return_if_fail (parseoptions != NULL);
 351
 352         parseoptions->stringindicator = stringindicator;
 353 }
 354
 355 /**
 356  * stf_parse_options_csv_set_indicator_2x_is_single:
 357  * @indic_2x: a boolean value indicating whether we want to see two
 358  *              adjacent string indicators as a single string indicator
 359  *              that is part of the cell, rather than a terminator.
 360  **/
 361 void
 362 stf_parse_options_csv_set_indicator_2x_is_single (StfParseOptions_t *parseoptions,
 363                                                   gboolean const indic_2x)
 364 {
 365         g_return_if_fail (parseoptions != NULL);
 366
 367         parseoptions->indicator_2x_is_single = indic_2x;
 368 }
 369
 370 /**
 371  * stf_parse_options_csv_set_duplicates:
 372  * @parseoptions:
 373  * @duplicates: a boolean value indicating whether we want to see two
 374  *               separators right behind each other as one
 375  **/
 376 void
 377 stf_parse_options_csv_set_duplicates (StfParseOptions_t *parseoptions, gboolean const duplicates)
 378 {
 379         g_return_if_fail (parseoptions != NULL);
 380
 381         parseoptions->sep.duplicates = duplicates;
 382 }
 383
 384 /**
 385  * stf_parse_options_csv_set_trim_seps:
 386  * @trim_seps: a boolean value indicating whether we want to ignore
 387  *               separators at the beginning of lines
 388  **/
 389 void
 390 stf_parse_options_csv_set_trim_seps (StfParseOptions_t *parseoptions, gboolean const trim_seps)
 391 {
 392         g_return_if_fail (parseoptions != NULL);
 393
 394         parseoptions->trim_seps = trim_seps;
 395 }
 396
 397 /**
 398  * stf_parse_options_fixed_splitpositions_clear:
 399  *
 400  * This will clear the splitpositions (== points on which a line is split)
 401  **/
 402 void
 403 stf_parse_options_fixed_splitpositions_clear (StfParseOptions_t *parseoptions)
 404 {
 405         int minus_one = -1;
 406         g_return_if_fail (parseoptions != NULL);
 407
 408         if (parseoptions->splitpositions)
 409                 g_array_free (parseoptions->splitpositions, TRUE);
 410         parseoptions->splitpositions = g_array_new (FALSE, FALSE, sizeof (int));
 411
 412         g_array_append_val (parseoptions->splitpositions, minus_one);
 413 }
 414
 415 /**
 416  * stf_parse_options_fixed_splitpositions_add:
 417  *
 418  * @position will be added to the splitpositions.
 419  **/
 420 void
 421 stf_parse_options_fixed_splitpositions_add (StfParseOptions_t *parseoptions, int position)
 422 {
 423         unsigned int ui;
 424
 425         g_return_if_fail (parseoptions != NULL);
 426         g_return_if_fail (position >= 0);
 427
 428         for (ui = 0; ui < parseoptions->splitpositions->len - 1; ui++) {
 429                 int here = g_array_index (parseoptions->splitpositions, int, ui);
 430                 if (position == here)
 431                         return;
 432                 if (position < here)
 433                         break;
 434         }
 435
 436         g_array_insert_val (parseoptions->splitpositions, ui, position);
 437 }
 438
 439 void
 440 stf_parse_options_fixed_splitpositions_remove (StfParseOptions_t *parseoptions, int position)
 441 {
 442         unsigned int ui;
 443
 444         g_return_if_fail (parseoptions != NULL);
 445         g_return_if_fail (position >= 0);
 446
 447         for (ui = 0; ui < parseoptions->splitpositions->len - 1; ui++) {
 448                 int here = g_array_index (parseoptions->splitpositions, int, ui);
 449                 if (position == here)
 450                         g_array_remove_index (parseoptions->splitpositions, ui);
 451                 if (position <= here)
 452                         return;
 453         }
 454 }
 455
 456 int
 457 stf_parse_options_fixed_splitpositions_count (StfParseOptions_t *parseoptions)
 458 {
 459         return parseoptions->splitpositions->len;
 460 }
 461
 462 int
 463 stf_parse_options_fixed_splitpositions_nth (StfParseOptions_t *parseoptions, int n)
 464 {
 465         return g_array_index (parseoptions->splitpositions, int, n);
 466 }
 467
 468
 469 /**
 470  * stf_parse_options_valid:
 471  * @parseoptions: an import options struct
 472  *
 473  * Checks if @parseoptions is correctly filled
 474  *
 475  * returns : TRUE if it is correctly filled, FALSE otherwise.
 476  **/
 477 static gboolean
 478 stf_parse_options_valid (StfParseOptions_t *parseoptions)
 479 {
 480         g_return_val_if_fail (parseoptions != NULL, FALSE);
 481
 482         if (parseoptions->parsetype == PARSE_TYPE_FIXED) {
 483                 if (!parseoptions->splitpositions) {
 484                         g_warning ("STF: No splitpositions in struct");
 485                         return FALSE;
 486                 }
 487         }
 488
 489         return TRUE;
 490 }
 491
 492 /*******************************************************************************************************
 493  * STF PARSE : The actual routines that do the 'trick'
 494  *******************************************************************************************************/
 495
 496 static void
 497 trim_spaces_inplace (char *field, StfParseOptions_t const *parseoptions)
 498 {
 499         if (!field) return;
 500
 501         if (parseoptions->trim_spaces & TRIM_TYPE_LEFT) {
 502                 char *s = field;
 503
 504                 while (g_unichar_isspace (g_utf8_get_char (s)))
 505                         s = g_utf8_next_char (s);
 506
 507                 if (s != field)
 508                         memmove (field, s, 1 + strlen (s));
 509         }
 510
 511         if (parseoptions->trim_spaces & TRIM_TYPE_RIGHT) {
 512                 char *s = field + strlen (field);
 513
 514                 while (field != s) {
 515                         s = g_utf8_prev_char (s);
 516                         if (!g_unichar_isspace (g_utf8_get_char (s)))
 517                                 break;
 518                         *s = 0;
 519                 }
 520         }
 521 }
 522
 523 /**
 524  * stf_parse_csv_is_separator:
 525  *
 526  * returns NULL if @character is not a separator, a pointer to the character
 527  * after the separator otherwise.
 528  **/
 529 static char const *
 530 stf_parse_csv_is_separator (char const *character, char const *chr, GSList const *str)
 531 {
 532         g_return_val_if_fail (character != NULL, NULL);
 533
 534         if (*character == 0)
 535                 return NULL;
 536
 537         if (str) {
 538                 GSList const *l;
 539
 540                 for (l = str; l != NULL; l = l->next) {
 541                         char const *s = l->data;
 542                         char const *r;
 543                         glong cnt;
 544                         glong const len = g_utf8_strlen (s, -1);
 545
 546                         /* Don't compare past the end of the buffer! */
 547                         for (r = character, cnt = 0; cnt < len; cnt++, r = g_utf8_next_char (r))
 548                                 if (*r == '\0')
 549                                         break;
 550
 551                         if ((cnt == len) && (memcmp (character, s, len) == 0))
 552                                 return g_utf8_offset_to_pointer (character, len);
 553                 }
 554         }
 555
 556         if (chr && my_utf8_strchr (chr, g_utf8_get_char (character)))
 557                 return g_utf8_next_char(character);
 558
 559         return NULL;
 560 }
 561
 562 /*
 563  * stf_parse_eat_separators:
 564  *
 565  * skip over leading separators
 566  *
 567  */
 568
 569 static void
 570 stf_parse_eat_separators (Source_t *src, StfParseOptions_t *parseoptions)
 571 {
 572         char const *cur, *next;
 573
 574         g_return_if_fail (src != NULL);
 575         g_return_if_fail (parseoptions != NULL);
 576
 577         cur = src->position;
 578
 579         if (*cur == '\0' || compare_terminator (cur, parseoptions))
 580                 return;
 581         while ((next = stf_parse_csv_is_separator (cur, parseoptions->sep.chr, parseoptions->sep.str)))
 582                 cur = next;
 583         src->position = cur;
 584         return;
 585 }
 586
 587
 588 typedef enum {
 589         STF_CELL_ERROR,
 590         STF_CELL_EOF,
 591         STF_CELL_EOL,
 592         STF_CELL_FIELD_NO_SEP,
 593         STF_CELL_FIELD_SEP
 594 } StfParseCellRes;
 595
 596 static StfParseCellRes
 597 stf_parse_csv_cell (GString *text, Source_t *src, StfParseOptions_t *parseoptions)
 598 {
 599         char const *cur;
 600         gboolean saw_sep = FALSE;
 601
 602         g_return_val_if_fail (src != NULL, STF_CELL_ERROR);
 603         g_return_val_if_fail (parseoptions != NULL, STF_CELL_ERROR);
 604
 605         cur = src->position;
 606         g_return_val_if_fail (cur != NULL, STF_CELL_ERROR);
 607
 608         /* Skip whitespace, but stop at line terminators.  */
 609         while (1) {
 610                 int term_len;
 611
 612                 if (*cur == 0) {
 613                         src->position = cur;
 614                         return STF_CELL_EOF;
 615                 }
 616
 617                 term_len = compare_terminator (cur, parseoptions);
 618                 if (term_len) {
 619                         src->position = cur + term_len;
 620                         return STF_CELL_EOL;
 621                 }
 622
 623                 if ((parseoptions->trim_spaces & TRIM_TYPE_LEFT) == 0)
 624                         break;
 625
 626                 if (stf_parse_csv_is_separator (cur, parseoptions->sep.chr,
 627                                                 parseoptions->sep.str))
 628                         break;
 629
 630                 if (!g_unichar_isspace (g_utf8_get_char (cur)))
 631                         break;
 632                 cur = g_utf8_next_char (cur);
 633         }
 634
 635         if (parseoptions->stringindicator != 0 &&
 636             g_utf8_get_char (cur) == parseoptions->stringindicator) {
 637                 cur = g_utf8_next_char (cur);
 638                 while (*cur) {
 639                         gunichar uc = g_utf8_get_char (cur);
 640                         cur = g_utf8_next_char (cur);
 641
 642                         if (uc == parseoptions->stringindicator) {
 643                                 if (parseoptions->indicator_2x_is_single &&
 644                                     g_utf8_get_char (cur) == parseoptions->stringindicator)
 645                                         cur = g_utf8_next_char (cur);
 646                                 else {
 647                                         /* "field content"dropped-garbage,  */
 648                                         while (*cur && !compare_terminator (cur, parseoptions)) {
 649                                                 char const *post = stf_parse_csv_is_separator
 650                                                         (cur, parseoptions->sep.chr, parseoptions->sep.str);
 651                                                 if (post) {
 652                                                         cur = post;
 653                                                         saw_sep = TRUE;
 654                                                         break;
 655                                                 }
 656                                                 cur = g_utf8_next_char (cur);
 657                                         }
 658                                         break;
 659                                 }
 660                         }
 661
 662                         g_string_append_unichar (text, uc);
 663                 }
 664
 665                 /* We silently allow a missing terminating quote.  */
 666         } else {
 667                 /* Unquoted field.  */
 668
 669                 while (*cur && !compare_terminator (cur, parseoptions)) {
 670
 671                         char const *post = stf_parse_csv_is_separator
 672                                 (cur, parseoptions->sep.chr, parseoptions->sep.str);
 673                         if (post) {
 674                                 cur = post;
 675                                 saw_sep = TRUE;
 676                                 break;
 677                         }
 678
 679                         g_string_append_unichar (text, g_utf8_get_char (cur));
 680                         cur = g_utf8_next_char (cur);
 681                 }
 682
 683                 if (parseoptions->trim_spaces & TRIM_TYPE_RIGHT) {
 684                         while (text->len) {
 685                                 const char *last = g_utf8_prev_char (text->str + text->len);
 686                                 if (!g_unichar_isspace (g_utf8_get_char (last)))
 687                                         break;
 688                                 g_string_truncate (text, last - text->str);
 689                         }
 690                 }
 691         }
 692
 693         src->position = cur;
 694
 695         if (saw_sep && parseoptions->sep.duplicates)
 696                 stf_parse_eat_separators (src, parseoptions);
 697
 698         return saw_sep ? STF_CELL_FIELD_SEP : STF_CELL_FIELD_NO_SEP;
 699 }
 700
 701 /**
 702  * stf_parse_csv_line:
 703  *
 704  * This will parse one line from the current @src->position.
 705  * NOTE: The calling routine is responsible for freeing the result.
 706  *
 707  * returns : a GPtrArray of char*'s
 708  **/
 709 static GPtrArray *
 710 stf_parse_csv_line (Source_t *src, StfParseOptions_t *parseoptions)
 711 {
 712         GPtrArray *line;
 713         gboolean cont = FALSE;
 714         GString *text;
 715
 716         g_return_val_if_fail (src != NULL, NULL);
 717         g_return_val_if_fail (parseoptions != NULL, NULL);
 718
 719         line = g_ptr_array_new ();
 720         if (parseoptions->trim_seps)
 721                 stf_parse_eat_separators (src, parseoptions);
 722
 723         text = g_string_sized_new (30);
 724
 725         while (1) {
 726                 char *ctext;
 727                 StfParseCellRes res =
 728                         stf_parse_csv_cell (text, src, parseoptions);
 729                 trim_spaces_inplace (text->str, parseoptions);
 730                 ctext = g_string_chunk_insert_len (src->chunk,
 731                                                    text->str, text->len);
 732                 g_string_truncate (text, 0);
 733
 734                 switch (res) {
 735                 case STF_CELL_FIELD_NO_SEP:
 736                         g_ptr_array_add (line, ctext);
 737                         cont = FALSE;
 738                         break;
 739
 740                 case STF_CELL_FIELD_SEP:
 741                         g_ptr_array_add (line, ctext);
 742                         cont = TRUE;  /* Make sure we see one more field.  */
 743                         break;
 744
 745                 default:
 746                         if (cont)
 747                                 g_ptr_array_add (line, ctext);
 748                         g_string_free (text, TRUE);
 749                         return line;
 750                 }
 751         }
 752 }
 753
 754 /**
 755  * stf_parse_fixed_cell:
 756  *
 757  * returns a pointer to the parsed cell contents.
 758  **/
 759 static char *
 760 stf_parse_fixed_cell (Source_t *src, StfParseOptions_t *parseoptions)
 761 {
 762         char *res;
 763         char const *cur;
 764         int splitval;
 765
 766         g_return_val_if_fail (src != NULL, NULL);
 767         g_return_val_if_fail (parseoptions != NULL, NULL);
 768
 769         cur = src->position;
 770
 771         if (src->splitpos < my_garray_len (parseoptions->splitpositions))
 772                 splitval = (int) g_array_index (parseoptions->splitpositions, int, src->splitpos);
 773         else
 774                 splitval = -1;
 775
 776         while (*cur != 0 && !compare_terminator (cur, parseoptions) && splitval != src->linepos) {
 777                 src->linepos++;
 778                 cur = g_utf8_next_char (cur);
 779         }
 780
 781         res = g_string_chunk_insert_len (src->chunk,
 782                                          src->position,
 783                                          cur - src->position);
 784
 785         src->position = cur;
 786
 787         return res;
 788 }
 789
 790 /**
 791  * stf_parse_fixed_line:
 792  *
 793  * This will parse one line from the current @src->position.
 794  * It will return a GPtrArray with the cell contents as strings.
 795
 796  * NOTE: The calling routine is responsible for freeing result.
 797  **/
 798 static GPtrArray *
 799 stf_parse_fixed_line (Source_t *src, StfParseOptions_t *parseoptions)
 800 {
 801         GPtrArray *line;
 802
 803         g_return_val_if_fail (src != NULL, NULL);
 804         g_return_val_if_fail (parseoptions != NULL, NULL);
 805
 806         src->linepos = 0;
 807         src->splitpos = 0;
 808
 809         line = g_ptr_array_new ();
 810         while (*src->position != '\0' && !compare_terminator (src->position, parseoptions)) {
 811                 char *field = stf_parse_fixed_cell (src, parseoptions);
 812
 813                 trim_spaces_inplace (field, parseoptions);
 814                 g_ptr_array_add (line, field);
 815
 816                 src->splitpos++;
 817         }
 818
 819         while (line->len < parseoptions->splitpositions->len)
 820                 g_ptr_array_add (line, g_strdup (""));
 821
 822         return line;
 823 }
 824
 825 /**
 826  * stf_parse_general_free: (skip)
 827  */
 828 void
 829 stf_parse_general_free (GPtrArray *lines)
 830 {
 831         unsigned lineno;
 832         for (lineno = 0; lineno < lines->len; lineno++) {
 833                 GPtrArray *line = g_ptr_array_index (lines, lineno);
 834                 /* Fields are not freed here.  */
 835                 if (line)
 836                         g_ptr_array_free (line, TRUE);
 837         }
 838         g_ptr_array_free (lines, TRUE);
 839 }
 840
 841
 842 /**
 843  * stf_parse_general: (skip)
 844  *
 845  * Returns: (transfer full): a GPtrArray of lines, where each line is itself a
 846  * GPtrArray of strings.
 847  *
 848  * The caller must free this entire structure, for example by calling
 849  * stf_parse_general_free.
 850  **/
 851 GPtrArray *
 852 stf_parse_general (StfParseOptions_t *parseoptions,
 853                    GStringChunk *lines_chunk,
 854                    char const *data, char const *data_end)
 855 {
 856         GPtrArray *lines;
 857         Source_t src;
 858         int row;
 859         char const *valid_end = data_end;
 860
 861         g_return_val_if_fail (parseoptions != NULL, NULL);
 862         g_return_val_if_fail (data != NULL, NULL);
 863         g_return_val_if_fail (data_end != NULL, NULL);
 864         g_return_val_if_fail (stf_parse_options_valid (parseoptions), NULL);
 865         g_return_val_if_fail (g_utf8_validate (data, data_end-data, &valid_end), NULL);
 866
 867         src.chunk = lines_chunk;
 868         src.position = data;
 869         row = 0;
 870
 871         if ((data_end-data >= 3) && !strncmp(src.position, "\xEF\xBB\xBF", 3)) {
 872                 /* Skip over byte-order mark */
 873                 src.position += 3;
 874         }
 875
 876         lines = g_ptr_array_new ();
 877         while (*src.position != '\0' && src.position < data_end) {
 878                 GPtrArray *line;
 879
 880                 if (row == GNM_MAX_ROWS) {
 881                         parseoptions->rows_exceeded = TRUE;
 882                         break;
 883                 }
 884
 885                 line = parseoptions->parsetype == PARSE_TYPE_CSV
 886                         ? stf_parse_csv_line (&src, parseoptions)
 887                         : stf_parse_fixed_line (&src, parseoptions);
 888
 889                 g_ptr_array_add (lines, line);
 890                 if (parseoptions->parsetype != PARSE_TYPE_CSV)
 891                         src.position += compare_terminator (src.position, parseoptions);
 892                 row++;
 893         }
 894
 895         return lines;
 896 }
 897
 898 /**
 899  * stf_parse_lines: (skip)
 900  * @parseoptions: #StfParseOptions_t
 901  * @lines_chunk:
 902  * @data:
 903  * @maxlines:
 904  * @with_lineno:
 905  *
 906  * Returns: (transfer full): a GPtrArray of lines, where each line is itself a
 907  * GPtrArray of strings.
 908  *
 909  * The caller must free this entire structure, for example by calling
 910  * stf_parse_general_free.
 911  **/
 912 GPtrArray *
 913 stf_parse_lines (StfParseOptions_t *parseoptions,
 914                  GStringChunk *lines_chunk,
 915                  char const *data,
 916                  int maxlines, gboolean with_lineno)
 917 {
 918         GPtrArray *lines;
 919         int lineno = 1;
 920
 921         g_return_val_if_fail (data != NULL, NULL);
 922
 923         lines = g_ptr_array_new ();
 924         while (*data) {
 925                 char const *data0 = data;
 926                 GPtrArray *line = g_ptr_array_new ();
 927
 928                 if (with_lineno) {
 929                         char buf[4 * sizeof (int)];
 930                         sprintf (buf, "%d", lineno);
 931                         g_ptr_array_add (line,
 932                                          g_string_chunk_insert (lines_chunk, buf));
 933                 }
 934
 935                 while (1) {
 936                         int termlen = compare_terminator (data, parseoptions);
 937                         if (termlen > 0 || *data == 0) {
 938                                 g_ptr_array_add (line,
 939                                                  g_string_chunk_insert_len (lines_chunk,
 940                                                                             data0,
 941                                                                             data - data0));
 942                                 data += termlen;
 943                                 break;
 944                         } else
 945                                 data = g_utf8_next_char (data);
 946                 }
 947
 948                 g_ptr_array_add (lines, line);
 949
 950                 lineno++;
 951                 if (lineno >= maxlines)
 952                         break;
 953         }
 954         return lines;
 955 }
 956
 957 char const *
 958 stf_parse_find_line (StfParseOptions_t *parseoptions,
 959                      char const *data,
 960                      int line)
 961 {
 962         while (line > 0) {
 963                 int termlen = compare_terminator (data, parseoptions);
 964                 if (termlen > 0) {
 965                         data += termlen;
 966                         line--;
 967                 } else if (*data == 0) {
 968                         return data;
 969                 } else {
 970                         data = g_utf8_next_char (data);
 971                 }
 972         }
 973         return data;
 974 }
 975
 976
 977 /**
 978  * stf_parse_options_fixed_autodiscover:
 979  * @parseoptions: a Parse options struct.
 980  * @data: The actual data.
 981  * @data_end: data end.
 982  *
 983  * Automatically try to discover columns in the text to be parsed.
 984  * We ignore empty lines (only containing parseoptions->terminator)
 985  *
 986  * FIXME: This is so extremely ugly that I am too tired to rewrite it right now.
 987  *        Think hard of a better more flexible solution...
 988  **/
 989 void
 990 stf_parse_options_fixed_autodiscover (StfParseOptions_t *parseoptions,
 991                                       char const *data, char const *data_end)
 992 {
 993         char const *iterator = data;
 994         GSList *list = NULL;
 995         GSList *list_start = NULL;
 996         int lines = 0;
 997         int effective_lines = 0;
 998         int max_line_length = 0;
 999         int *line_begin_hits = NULL;
1000         int *line_end_hits = NULL;
1001         int i;
1002
1003         stf_parse_options_fixed_splitpositions_clear (parseoptions);
1004
1005         /*
1006          * First take a look at all possible white space combinations
1007          */
1008         while (*iterator && iterator < data_end) {
1009                 gboolean begin_recorded = FALSE;
1010                 AutoDiscovery_t *disc = NULL;
1011                 int position = 0;
1012                 int termlen = 0;
1013
1014                 while (*iterator && (termlen = compare_terminator (iterator, parseoptions)) == 0) {
1015                         if (!begin_recorded && *iterator == ' ') {
1016                                 disc = g_new0 (AutoDiscovery_t, 1);
1017
1018                                 disc->start = position;
1019
1020                                 begin_recorded = TRUE;
1021                         } else if (begin_recorded && *iterator != ' ') {
1022                                 disc->stop = position;
1023                                 list = g_slist_prepend (list, disc);
1024
1025                                 begin_recorded = FALSE;
1026                                 disc = NULL;
1027                         }
1028
1029                         position++;
1030                         iterator++;
1031                 }
1032
1033                 if (position > max_line_length)
1034                         max_line_length = position;
1035
1036                 /*
1037                  * If there are excess spaces at the end of
1038                  * the line : ignore them
1039                  */
1040                 g_free (disc);
1041
1042                 /*
1043                  * Hop over the terminator
1044                  */
1045                 iterator += termlen;
1046
1047                 if (position != 0)
1048                         effective_lines++;
1049
1050                 lines++;
1051         }
1052
1053         list       = g_slist_reverse (list);
1054         list_start = list;
1055
1056         /*
1057          * Kewl stuff:
1058          * Look at the number of hits at each line position
1059          * if the number of hits equals the number of lines
1060          * we can be pretty sure this is the start or end
1061          * of a column, we filter out empty columns
1062          * later
1063          */
1064         line_begin_hits = g_new0 (int, max_line_length + 1);
1065         line_end_hits   = g_new0 (int, max_line_length + 1);
1066
1067         while (list) {
1068                 AutoDiscovery_t *disc = list->data;
1069
1070                 line_begin_hits[disc->start]++;
1071                 line_end_hits[disc->stop]++;
1072
1073                 g_free (disc);
1074
1075                 list = g_slist_next (list);
1076         }
1077         g_slist_free (list_start);
1078
1079         for (i = 0; i < max_line_length + 1; i++)
1080                 if (line_begin_hits[i] == effective_lines || line_end_hits[i] == effective_lines)
1081                         stf_parse_options_fixed_splitpositions_add (parseoptions, i);
1082
1083         /*
1084          * Do some corrections to the initial columns
1085          * detected here, we obviously don't need to
1086          * do this if there are no columns at all.
1087          */
1088         if (my_garray_len (parseoptions->splitpositions) > 0) {
1089                 /*
1090                  * Try to find columns that look like:
1091                  *
1092                  * Example     100
1093                  * Example2      9
1094                  *
1095                  * (In other words : Columns with left & right justification with
1096                  *  a minimum of 2 spaces in the middle)
1097                  * Split these columns in 2
1098                  */
1099
1100                 for (i = 0; i < my_garray_len (parseoptions->splitpositions) - 1; i++) {
1101                         int begin = g_array_index (parseoptions->splitpositions, int, i);
1102                         int end   = g_array_index (parseoptions->splitpositions, int, i + 1);
1103                         int num_spaces   = -1;
1104                         int spaces_start = 0;
1105                         gboolean right_aligned = TRUE;
1106                         gboolean left_aligned  = TRUE;
1107                         gboolean has_2_spaces  = TRUE;
1108
1109                         iterator = data;
1110                         lines = 0;
1111                         while (*iterator && iterator < data_end) {
1112                                 gboolean trigger = FALSE;
1113                                 gboolean space_trigger = FALSE;
1114                                 int pos = 0;
1115
1116                                 num_spaces   = -1;
1117                                 spaces_start = 0;
1118                                 while (*iterator && !compare_terminator (iterator, parseoptions)) {
1119                                         if (pos == begin) {
1120                                                 if (*iterator == ' ')
1121                                                         left_aligned = FALSE;
1122
1123                                                 trigger = TRUE;
1124                                         } else if (pos == end - 1) {
1125                                                 if (*iterator == ' ')
1126                                                         right_aligned = FALSE;
1127
1128                                                 trigger = FALSE;
1129                                         }
1130
1131                                         if (trigger || pos == end - 1) {
1132                                                 if (!space_trigger && *iterator == ' ') {
1133                                                         space_trigger = TRUE;
1134                                                         spaces_start = pos;
1135                                                 } else if (space_trigger && *iterator != ' ') {
1136                                                         space_trigger = FALSE;
1137                                                         num_spaces = pos - spaces_start;
1138                                                 }
1139                                         }
1140
1141                                         iterator++;
1142                                         pos++;
1143                                 }
1144
1145                                 if (num_spaces < 2)
1146                                         has_2_spaces = FALSE;
1147
1148                                 if (*iterator)
1149                                         iterator++;
1150
1151                                 lines++;
1152                         }
1153
1154                         /*
1155                          * If this column meets all the criteria
1156                          * split it into two at the last measured
1157                          * spaces_start + num_spaces
1158                          */
1159                         if (has_2_spaces && right_aligned && left_aligned) {
1160                                 int val = (((spaces_start + num_spaces) - spaces_start) / 2) + spaces_start;
1161
1162                                 g_array_insert_val (parseoptions->splitpositions, i + 1, val);
1163
1164                                 /*
1165                                  * Skip over the inserted column
1166                                  */
1167                                 i++;
1168                         }
1169                 }
1170
1171                 /*
1172                  * Remove empty columns here if needed
1173                  */
1174                 for (i = 0; i < my_garray_len (parseoptions->splitpositions) - 1; i++) {
1175                         int begin = g_array_index (parseoptions->splitpositions, int, i);
1176                         int end = g_array_index (parseoptions->splitpositions, int, i + 1);
1177                         gboolean only_spaces = TRUE;
1178
1179                         iterator = data;
1180                         lines = 0;
1181                         while (*iterator && iterator < data_end) {
1182                                 gboolean trigger = FALSE;
1183                                 int pos = 0;
1184
1185                                 while (*iterator && !compare_terminator (iterator, parseoptions)) {
1186                                         if (pos == begin)
1187                                                 trigger = TRUE;
1188                                         else if (pos == end)
1189                                                 trigger = FALSE;
1190
1191                                         if (trigger) {
1192                                                 if (*iterator != ' ')
1193                                                         only_spaces = FALSE;
1194                                         }
1195
1196                                         iterator++;
1197                                         pos++;
1198                                 }
1199
1200                                 if (*iterator)
1201                                         iterator++;
1202
1203                                 lines++;
1204                         }
1205
1206                         /*
1207                          * The column only contains spaces
1208                          * remove it
1209                          */
1210                         if (only_spaces) {
1211                                 g_array_remove_index (parseoptions->splitpositions, i);
1212
1213                                 /*
1214                                  * We HAVE to make sure that the next column (end) also
1215                                  * gets checked out. If we don't decrease "i" here, we
1216                                  * will skip over it as the indexes shift down after
1217                                  * the removal
1218                                  */
1219                                 i--;
1220                         }
1221                 }
1222         }
1223
1224         g_free (line_begin_hits);
1225         g_free (line_end_hits);
1226 }
1227
1228 /*******************************************************************************************************
1229  * STF PARSE HL: high-level functions that dump the raw data returned by the low-level parsing
1230  *               functions into something meaningful (== application specific)
1231  *******************************************************************************************************/
1232
1233 /*
1234  * This is more or less as gnm_cell_set_text, except...
1235  * 1. Unknown names are not allowed.
1236  * 2. Only '=' can start an expression.
1237  */
1238
1239 static void
1240 stf_cell_set_text (GnmCell *cell, char const *text)
1241 {
1242         GnmExprTop const *texpr;
1243         GnmValue *val;
1244         GOFormat const *fmt = gnm_style_get_format (gnm_cell_get_style (cell));
1245         const GODateConventions *date_conv = sheet_date_conv (cell->base.sheet);
1246
1247         if (!go_format_is_text (fmt) && *text == '=' && text[1] != 0) {
1248                 GnmExprParseFlags flags =
1249                         GNM_EXPR_PARSE_UNKNOWN_NAMES_ARE_INVALID;
1250                 const char *expr_start = text + 1;
1251                 GnmParsePos pos;
1252                 val = NULL;
1253                 parse_pos_init_cell (&pos, cell);
1254                 texpr = gnm_expr_parse_str (expr_start, &pos, flags,
1255                                             NULL, NULL);
1256         } else {
1257                 texpr = NULL;
1258                 val = format_match (text, fmt, date_conv);
1259         }
1260
1261         if (!val && !texpr)
1262                 val = value_new_string (text);
1263
1264         if (val)
1265                 gnm_cell_set_value (cell, val);
1266         else {
1267                 gnm_cell_set_expr (cell, texpr);
1268                 gnm_expr_top_unref (texpr);
1269         }
1270 }
1271
1272 static void
1273 stf_read_remember_settings (Workbook *book, StfParseOptions_t *po)
1274 {
1275         if (po->parsetype == PARSE_TYPE_CSV) {
1276                 GnmStfExport *stfe = gnm_stf_get_stfe (G_OBJECT (book));
1277                 char quote[6];
1278                 int length = g_unichar_to_utf8 (po->stringindicator, quote);
1279                 if (length > 5) {
1280                         quote[0] = '"';
1281                         quote[1] = '\0';
1282                 } else quote[length] = '\0';
1283
1284                 g_object_set (G_OBJECT (stfe), "separator", po->sep.chr, "quote", &quote, NULL);
1285
1286                 if ((po->terminator != NULL) &&  (po->terminator->data != NULL))
1287                         g_object_set (G_OBJECT (stfe), "eol", po->terminator->data, NULL);
1288         }
1289 }
1290
1291 gboolean
1292 stf_parse_sheet (StfParseOptions_t *parseoptions,
1293                  char const *data, char const *data_end,
1294                  Sheet *sheet, int start_col, int start_row)
1295 {
1296         int row;
1297         unsigned int lrow;
1298         GStringChunk *lines_chunk;
1299         GPtrArray *lines;
1300         gboolean result = TRUE;
1301         int col;
1302         unsigned int lcol;
1303         size_t nformats;
1304
1305         SETUP_LOCALE_SWITCH;
1306
1307         g_return_val_if_fail (parseoptions != NULL, FALSE);
1308         g_return_val_if_fail (data != NULL, FALSE);
1309         g_return_val_if_fail (IS_SHEET (sheet), FALSE);
1310
1311         if (!data_end)
1312                 data_end = data + strlen (data);
1313
1314         lines_chunk = g_string_chunk_new (100 * 1024);
1315         lines = stf_parse_general (parseoptions, lines_chunk, data, data_end);
1316         if (lines == NULL)
1317                 result = FALSE;
1318
1319         col = start_col;
1320         nformats = parseoptions->formats->len;
1321         for (lcol = 0; lcol < nformats; lcol++) {
1322                 GOFormat const *fmt = g_ptr_array_index (parseoptions->formats, lcol);
1323                 GnmStyle *mstyle;
1324                 gboolean want_col =
1325                         (parseoptions->col_import_array == NULL ||
1326                          parseoptions->col_import_array_len <= lcol ||
1327                          parseoptions->col_import_array[lcol]);
1328                 if (!want_col || col >= gnm_sheet_get_max_cols (sheet))
1329                         continue;
1330
1331                 if (fmt && !go_format_is_general (fmt)) {
1332                         GnmRange r;
1333                         int end_row = MIN (start_row + (int)lines->len - 1,
1334                                            gnm_sheet_get_last_row (sheet));
1335
1336                         range_init (&r, col, start_row, col, end_row);
1337                         mstyle = gnm_style_new ();
1338                         gnm_style_set_format (mstyle, fmt);
1339                         sheet_apply_style (sheet, &r, mstyle);
1340                 }
1341                 col++;
1342         }
1343
1344         START_LOCALE_SWITCH;
1345         for (row = start_row, lrow = 0;
1346              result && lrow < lines->len;
1347              row++, lrow++) {
1348                 GPtrArray *line;
1349
1350                 if (row >= gnm_sheet_get_max_rows (sheet)) {
1351                         if (!parseoptions->rows_exceeded) {
1352                                 /* FIXME: What locale?  */
1353                                 g_warning (_("There are more rows of data than "
1354                                              "there is room for in the sheet.  Extra "
1355                                              "rows will be ignored."));
1356                                 parseoptions->rows_exceeded = TRUE;
1357                         }
1358                         break;
1359                 }
1360
1361                 col = start_col;
1362                 line = g_ptr_array_index (lines, lrow);
1363
1364                 for (lcol = 0; lcol < line->len; lcol++) {
1365                         GOFormat const *fmt = lcol < nformats
1366                                 ? g_ptr_array_index (parseoptions->formats, lcol)
1367                                 : go_format_general ();
1368                         char const *text = g_ptr_array_index (line, lcol);
1369                         gboolean want_col =
1370                                 (parseoptions->col_import_array == NULL ||
1371                                  parseoptions->col_import_array_len <= lcol ||
1372                                  parseoptions->col_import_array[lcol]);
1373                         if (!want_col)
1374                                 continue;
1375
1376                         if (col >= gnm_sheet_get_max_cols (sheet)) {
1377                                 if (!parseoptions->cols_exceeded) {
1378                                         /* FIXME: What locale?  */
1379                                         g_warning (_("There are more columns of data than "
1380                                                      "there is room for in the sheet.  Extra "
1381                                                      "columns will be ignored."));
1382                                         parseoptions->cols_exceeded = TRUE;
1383                                 }
1384                                 break;
1385                         }
1386                         if (text && *text) {
1387                                 GnmCell *cell = sheet_cell_fetch (sheet, col, row);
1388                                 if (!go_format_is_text (fmt) &&
1389                                     lcol < parseoptions->formats_decimal->len &&
1390                                     g_ptr_array_index (parseoptions->formats_decimal, lcol)) {
1391                                         GOFormatFamily fam;
1392                                         GnmValue *v = format_match_decimal_number_with_locale
1393                                                 (text, &fam,
1394                                                  g_ptr_array_index (parseoptions->formats_curr, lcol),
1395                                                  g_ptr_array_index (parseoptions->formats_thousand, lcol),
1396                                                  g_ptr_array_index (parseoptions->formats_decimal, lcol));
1397                                         if (!v)
1398                                                 v = value_new_string (text);
1399                                         sheet_cell_set_value (cell, v);
1400                                 } else {
1401
1402                                         stf_cell_set_text (cell, text);
1403                                 }
1404                         }
1405                         col++;
1406                 }
1407
1408                 g_ptr_array_index (lines, lrow) = NULL;
1409                 g_ptr_array_free (line, TRUE);
1410         }
1411         END_LOCALE_SWITCH;
1412
1413         for (lcol = 0, col = start_col;
1414              lcol < parseoptions->col_import_array_len  && col < gnm_sheet_get_max_cols (sheet);
1415              lcol++) {
1416                 if (parseoptions->col_import_array == NULL ||
1417                     parseoptions->col_import_array_len <= lcol ||
1418                     parseoptions->col_import_array[lcol]) {
1419                         if (parseoptions->col_autofit_array == NULL ||
1420                             parseoptions->col_autofit_array[lcol]) {
1421                                 ColRowIndexList *list = colrow_get_index_list (col, col, NULL);
1422                                 ColRowStateGroup  *state = colrow_set_sizes (sheet, TRUE, list, -1, 0, -1);
1423                                 colrow_index_list_destroy (list);
1424                                 g_slist_free (state);
1425                         }
1426                         col++;
1427                 }
1428         }
1429
1430         g_string_chunk_free (lines_chunk);
1431         if (lines)
1432                 stf_parse_general_free (lines);
1433         if (result)
1434                 stf_read_remember_settings (sheet->workbook, parseoptions);
1435         return result;
1436 }
1437
1438 GnmCellRegion *
1439 stf_parse_region (StfParseOptions_t *parseoptions, char const *data, char const *data_end,
1440                   Workbook const *wb)
1441 {
1442         static GODateConventions const default_conv = {FALSE};
1443         GODateConventions const *date_conv = wb ? workbook_date_conv (wb) : &default_conv;
1444
1445         GnmCellRegion *cr;
1446         unsigned int row, colhigh = 0;
1447         GStringChunk *lines_chunk;
1448         GPtrArray *lines;
1449         size_t nformats;
1450
1451         SETUP_LOCALE_SWITCH;
1452
1453         g_return_val_if_fail (parseoptions != NULL, NULL);
1454         g_return_val_if_fail (data != NULL, NULL);
1455
1456         START_LOCALE_SWITCH;
1457
1458         cr = gnm_cell_region_new (NULL);
1459
1460         if (!data_end)
1461                 data_end = data + strlen (data);
1462         lines_chunk = g_string_chunk_new (100 * 1024);
1463         lines = stf_parse_general (parseoptions, lines_chunk, data, data_end);
1464         nformats = parseoptions->formats->len;
1465         for (row = 0; row < lines->len; row++) {
1466                 GPtrArray *line = g_ptr_array_index (lines, row);
1467                 unsigned int col, targetcol = 0;
1468                 for (col = 0; col < line->len; col++) {
1469                         if (parseoptions->col_import_array == NULL ||
1470                             parseoptions->col_import_array_len <= col ||
1471                             parseoptions->col_import_array[col]) {
1472                                 const char *text = g_ptr_array_index (line, col);
1473                                 if (text) {
1474                                         GOFormat *fmt = NULL;
1475                                         GnmValue *v;
1476                                         GnmCellCopy *cc;
1477
1478                                         if (col < nformats)
1479                                                 fmt = g_ptr_array_index (parseoptions->formats, col);
1480                                         v = format_match (text, fmt, date_conv);
1481                                         if (!v)
1482                                                 v = value_new_string (text);
1483
1484                                         cc = gnm_cell_copy_new (cr, targetcol, row);
1485                                         cc->val  = v;
1486                                         cc->texpr = NULL;
1487                                         targetcol++;
1488                                         if (targetcol > colhigh)
1489                                                 colhigh = targetcol;
1490                                 }
1491                         }
1492                 }
1493         }
1494         stf_parse_general_free (lines);
1495         g_string_chunk_free (lines_chunk);
1496
1497         END_LOCALE_SWITCH;
1498
1499         cr->cols    = (colhigh > 0) ? colhigh : 1;
1500         cr->rows    = row;
1501
1502         return cr;
1503 }
1504
1505 static int
1506 int_sort (void const *a, void const *b)
1507 {
1508         return *(int const *)a - *(int const *)b;
1509 }
1510
1511 static int
1512 count_character (GPtrArray *lines, gunichar c, double quantile)
1513 {
1514         int *counts, res;
1515         unsigned int lno, cno;
1516
1517         if (lines->len == 0)
1518                 return 0;
1519
1520         counts = g_new (int, lines->len);
1521         for (lno = cno = 0; lno < lines->len; lno++) {
1522                 int count = 0;
1523                 GPtrArray *boxline = g_ptr_array_index (lines, lno);
1524                 char const *line = g_ptr_array_index (boxline, 0);
1525
1526                 /* Ignore empty lines.  */
1527                 if (*line == 0)
1528                         continue;
1529
1530                 while (*line) {
1531                         if (g_utf8_get_char (line) == c)
1532                                 count++;
1533                         line = g_utf8_next_char (line);
1534                 }
1535
1536                 counts[cno++] = count;
1537         }
1538
1539         if (cno == 0)
1540                 res = 0;
1541         else {
1542                 unsigned int qi = (unsigned int)ceil (quantile * cno);
1543                 qsort (counts, cno, sizeof (counts[0]), int_sort);
1544                 if (qi == cno)
1545                         qi--;
1546                 res = counts[qi];
1547         }
1548
1549         g_free (counts);
1550
1551         return res;
1552 }
1553
1554 static void
1555 dump_guessed_options (const StfParseOptions_t *res)
1556 {
1557         GSList *l;
1558         char ubuffer[6 + 1];
1559         unsigned ui;
1560
1561         g_printerr ("Guessed format:\n");
1562         switch (res->parsetype) {
1563         case PARSE_TYPE_CSV:
1564                 g_printerr ("  type = sep\n");
1565                 g_printerr ("  separator = %s\n",
1566                             res->sep.chr ? res->sep.chr : "(none)");
1567                 g_printerr ("    see two as one = %s\n",
1568                             res->sep.duplicates ? "yes" : "no");
1569                 break;
1570         case PARSE_TYPE_FIXED:
1571                 g_printerr ("  type = sep\n");
1572                 break;
1573         default:
1574                 ;
1575         }
1576         g_printerr ("  trim space = %d\n", res->trim_spaces);
1577
1578         ubuffer[g_unichar_to_utf8 (res->stringindicator, ubuffer)] = 0;
1579         g_printerr ("  string indicator = %s\n", ubuffer);
1580         g_printerr ("    see two as one = %s\n",
1581                     res->indicator_2x_is_single ? "yes" : "no");
1582
1583         g_printerr ("  line terminators =");
1584         for (l = res->terminator; l; l = l->next) {
1585                 const char *t = l->data;
1586                 if (strcmp (t, "\n") == 0)
1587                         g_printerr (" unix");
1588                 else if (strcmp (t, "\r") == 0)
1589                         g_printerr (" mac");
1590                 else if (strcmp (t, "\r\n") == 0)
1591                         g_printerr (" dos");
1592                 else
1593                         g_printerr (" other");
1594         }
1595         g_printerr ("\n");
1596
1597         for (ui = 0; ui < res->formats->len; ui++) {
1598                 GOFormat const *fmt = g_ptr_array_index (res->formats, ui);
1599                 const GString *decimal = ui < res->formats_decimal->len
1600                         ? g_ptr_array_index (res->formats_decimal, ui)
1601                         : NULL;
1602                 const GString *thousand = ui < res->formats_thousand->len
1603                         ? g_ptr_array_index (res->formats_thousand, ui)
1604                         : NULL;
1605
1606                 g_printerr ("  fmt.%d = %s\n", ui, go_format_as_XL (fmt));
1607                 if (decimal)
1608                         g_printerr ("  fmt.%d.dec = %s\n", ui, decimal->str);
1609                 if (thousand)
1610                         g_printerr ("  fmt.%d.thou = %s\n", ui, thousand->str);
1611         }
1612 }
1613
1614 /**
1615  * stf_parse_options_guess:
1616  * @data: the input data.
1617  *
1618  * Returns: (transfer full): the guessed options.
1619  **/
1620 StfParseOptions_t *
1621 stf_parse_options_guess (char const *data)
1622 {
1623         StfParseOptions_t *res;
1624         GStringChunk *lines_chunk;
1625         GPtrArray *lines;
1626         int tabcount;
1627         int sepcount;
1628         gunichar sepchar = go_locale_get_arg_sep ();
1629
1630         g_return_val_if_fail (data != NULL, NULL);
1631
1632         res = stf_parse_options_new ();
1633         lines_chunk = g_string_chunk_new (100 * 1024);
1634         lines = stf_parse_lines (res, lines_chunk, data, 1000, FALSE);
1635
1636         tabcount = count_character (lines, '\t', 0.2);
1637         sepcount = count_character (lines, sepchar, 0.2);
1638
1639         /* At least one tab per line and enough to separate every
1640            would-be sepchars.  */
1641         if (tabcount >= 1 && tabcount >= sepcount - 1)
1642                 stf_parse_options_csv_set_separators (res, "\t", NULL);
1643         else {
1644                 gunichar c;
1645
1646                 /*
1647                  * Try a few more or less likely characters and pick the first
1648                  * one that occurs on at least half the lines.
1649                  *
1650                  * The order is mostly random, although ' ' and '!' which
1651                  * could very easily occur in text are put last.
1652                  */
1653                 if (count_character (lines, (c = sepchar), 0.5) > 0 ||
1654                     count_character (lines, (c = go_locale_get_col_sep ()), 0.5) > 0 ||
1655                     count_character (lines, (c = ':'), 0.5) > 0 ||
1656                     count_character (lines, (c = ','), 0.5) > 0 ||
1657                     count_character (lines, (c = ';'), 0.5) > 0 ||
1658                     count_character (lines, (c = '|'), 0.5) > 0 ||
1659                     count_character (lines, (c = '!'), 0.5) > 0 ||
1660                     count_character (lines, (c = ' '), 0.5) > 0) {
1661                         char sep[7];
1662                         sep[g_unichar_to_utf8 (c, sep)] = 0;
1663                         if (c == ' ')
1664                                 strcat (sep, "\t");
1665                         stf_parse_options_csv_set_separators (res, sep, NULL);
1666                 }
1667         }
1668
1669         // For now, always separated:
1670         stf_parse_options_set_type (res, PARSE_TYPE_CSV);
1671
1672         switch (res->parsetype) {
1673         case PARSE_TYPE_CSV: {
1674                 gboolean dups =
1675                         res->sep.chr &&
1676                         strchr (res->sep.chr, ' ') != NULL;
1677                 gboolean trim =
1678                         res->sep.chr &&
1679                         strchr (res->sep.chr, ' ') != NULL;
1680
1681                 stf_parse_options_set_trim_spaces (res, TRIM_TYPE_LEFT | TRIM_TYPE_RIGHT);
1682                 stf_parse_options_csv_set_indicator_2x_is_single (res, TRUE);
1683                 stf_parse_options_csv_set_duplicates (res, dups);
1684                 stf_parse_options_csv_set_trim_seps (res, trim);
1685
1686                 stf_parse_options_csv_set_stringindicator (res, '"');
1687                 break;
1688         }
1689
1690         case PARSE_TYPE_FIXED:
1691                 break;
1692
1693         default:
1694                 g_assert_not_reached ();
1695         }
1696
1697         stf_parse_general_free (lines);
1698         g_string_chunk_free (lines_chunk);
1699
1700         stf_parse_options_guess_formats (res, data);
1701
1702         if (gnm_debug_flag ("stf"))
1703                 dump_guessed_options (res);
1704
1705         return res;
1706 }
1707
1708 /**
1709  * stf_parse_options_guess_csv:
1710  * @data: the CSV input data.
1711  *
1712  * Returns: (transfer full): the guessed options.
1713  **/
1714 StfParseOptions_t *
1715 stf_parse_options_guess_csv (char const *data)
1716 {
1717         StfParseOptions_t *res;
1718         GStringChunk *lines_chunk;
1719         GPtrArray *lines;
1720         char *sep = NULL;
1721         char const *quoteline = NULL;
1722         int pass;
1723         gunichar stringind = '"';
1724
1725         g_return_val_if_fail (data != NULL, NULL);
1726
1727         res = stf_parse_options_new ();
1728         stf_parse_options_set_type (res, PARSE_TYPE_CSV);
1729         stf_parse_options_set_trim_spaces (res, TRIM_TYPE_LEFT | TRIM_TYPE_RIGHT);
1730         stf_parse_options_csv_set_indicator_2x_is_single (res, TRUE);
1731         stf_parse_options_csv_set_duplicates (res, FALSE);
1732         stf_parse_options_csv_set_trim_seps (res, FALSE);
1733         stf_parse_options_csv_set_stringindicator (res, stringind);
1734
1735         lines_chunk = g_string_chunk_new (100 * 1024);
1736         lines = stf_parse_lines (res, lines_chunk, data, 1000, FALSE);
1737
1738         /*
1739          * Find a line containing a quote; skip first line unless it is
1740          * the only one.  Prefer a line with the quote first.
1741          */
1742         for (pass = 1; !quoteline && pass <= 2; pass++) {
1743                 size_t lno;
1744                 for (lno = MIN (1, lines->len - 1);
1745                      !quoteline && lno < lines->len;
1746                      lno++) {
1747                         GPtrArray *boxline = g_ptr_array_index (lines, lno);
1748                         const char *line = g_ptr_array_index (boxline, 0);
1749                         switch (pass) {
1750                         case 1:
1751                                 if (g_utf8_get_char (line) == stringind)
1752                                         quoteline = line;
1753                                 break;
1754                         case 2:
1755                                 if (my_utf8_strchr (line, stringind))
1756                                         quoteline = line;
1757                                 break;
1758                         }
1759                 }
1760         }
1761
1762         if (quoteline) {
1763                 const char *p0 = my_utf8_strchr (quoteline, stringind);
1764                 const char *p = p0;
1765
1766                 do {
1767                         p = g_utf8_next_char (p);
1768                 } while (*p && g_utf8_get_char (p) != stringind);
1769                 if (*p) p = g_utf8_next_char (p);
1770                 while (*p && g_unichar_isspace (g_utf8_get_char (p)))
1771                         p = g_utf8_next_char (p);
1772                 if (*p) {
1773                         /* Use the character after the quote.  */
1774                         sep = g_strndup (p, g_utf8_next_char (p) - p);
1775                 } else {
1776                         /* Try to use character before the quote.  */
1777                         while (p0 > quoteline && !sep) {
1778                                 p = p0;
1779                                 p0 = g_utf8_prev_char (p0);
1780                                 if (!g_unichar_isspace (g_utf8_get_char (p0)))
1781                                         sep = g_strndup (p0, p - p0);
1782                         }
1783                 }
1784         }
1785
1786         if (!sep)
1787                 sep = g_strdup (",");
1788         stf_parse_options_csv_set_separators (res, sep, NULL);
1789         g_free (sep);
1790
1791         stf_parse_general_free (lines);
1792         g_string_chunk_free (lines_chunk);
1793
1794         stf_parse_options_guess_formats (res, data);
1795
1796         if (gnm_debug_flag ("stf"))
1797                 dump_guessed_options (res);
1798
1799         return res;
1800 }
1801
1802 typedef enum {
1803         STF_GUESS_DATE_DMY = 1,
1804         STF_GUESS_DATE_MDY = 2,
1805         STF_GUESS_DATE_YMD = 4,
1806
1807         STF_GUESS_NUMBER_DEC_POINT = 0x10,
1808         STF_GUESS_NUMBER_DEC_COMMA = 0x20,
1809         STF_GUESS_NUMBER_DEC_EITHER = 0x30,
1810
1811         STF_GUESS_ALL = 0x37
1812 } StfGuessFormats;
1813
1814 static void
1815 do_check_date (const char *data, StfGuessFormats flag,
1816                gboolean mbd, gboolean ybm,
1817                unsigned *possible,
1818                GODateConventions const *date_conv)
1819 {
1820         GnmValue *v;
1821         gboolean this_mbd, this_ybm;
1822         int imbd;
1823
1824         if (!(*possible & flag))
1825                 return;
1826
1827         v = format_match_datetime (data, date_conv, mbd, TRUE, FALSE);
1828         if (!v || !VALUE_FMT (v))
1829                 goto fail;
1830
1831         imbd = go_format_month_before_day (VALUE_FMT (v));
1832         this_mbd = (imbd >= 1);
1833         this_ybm = (imbd == 2);
1834         if (mbd != this_mbd || ybm != this_ybm)
1835                 goto fail;
1836
1837         goto done;
1838
1839 fail:
1840         *possible &= ~flag;
1841 done:
1842         value_release (v);
1843 }
1844
1845
1846 static void
1847 do_check_number (const char *data, StfGuessFormats flag,
1848                  const GString *dec, const GString *thousand, const GString *curr,
1849                  unsigned *possible, int *decimals)
1850 {
1851         GnmValue *v;
1852         GOFormatFamily family;
1853         const char *pthou;
1854
1855         if (!(*possible & flag))
1856                 return;
1857
1858         v = format_match_decimal_number_with_locale (data, &family, curr, thousand, dec);
1859         if (!v)
1860                 goto fail;
1861
1862         if (*decimals != -2) {
1863                 const char *pdec = strstr (data, dec->str);
1864                 int this_decimals = 0;
1865                 if (pdec) {
1866                         pdec += dec->len;
1867                         while (g_ascii_isdigit (*pdec)) {
1868                                 pdec++;
1869                                 this_decimals++;
1870                         }
1871                 }
1872                 if (*decimals == -1)
1873                         *decimals = this_decimals;
1874                 else if (*decimals != this_decimals)
1875                         *decimals = -2;
1876         }
1877
1878         pthou = strstr (data, thousand->str);
1879         if (pthou) {
1880                 const char *p;
1881                 int digits = 0, nonzero_digits = 0;
1882                 for (p = data; p < pthou; p = g_utf8_next_char (p)) {
1883                         if (g_unichar_isdigit (g_utf8_get_char (p))) {
1884                                 digits++;
1885                                 if (*p != '0')
1886                                         nonzero_digits++;
1887                         }
1888                 }
1889                 // "-.222" implies that "." is not a thousands separator.
1890                 // "0.222" implies that "." is not a thousands separator.
1891                 // "12345,555" implies that "," is not a thousands separator.
1892                 if (nonzero_digits == 0 || digits > 3)
1893                         goto fail;
1894         }
1895
1896         goto done;
1897
1898 fail:
1899         *possible &= ~flag;
1900 done:
1901         value_release (v);
1902 }
1903
1904
1905 /**
1906  * stf_parse_options_guess_formats:
1907  * @data: the CSV input data.
1908  *
1909  * This function attempts to recognize data formats on a column-by-column
1910  * basis under the assumption that the data in a text file will generally
1911  * use the same data formats.
1912  *
1913  * This is useful because not all values give sufficient information by
1914  * themselves to tell what format the data is in.  For example, "1/2/2000"
1915  * is likely to be a date in year 2000, but it is not clear if it is in
1916  * January or February.  If another value in the same column is "31/1/1999"
1917  * then it is likely that the former date was in February.
1918  *
1919  * Likewise, a value of "123,456" could mean either 1.23456e5 or 1.23456e2.
1920  * A later value of "111,200.22" would clear up the confusion.
1921  *
1922  **/
1923 void
1924 stf_parse_options_guess_formats (StfParseOptions_t *po, char const *data)
1925 {
1926         GStringChunk *lines_chunk;
1927         GPtrArray *lines;
1928         unsigned lno, col, colcount, sline;
1929         GODateConventions const *date_conv = go_date_conv_from_str ("Lotus:1900");
1930         GString *s_comma = g_string_new (",");
1931         GString *s_dot = g_string_new (".");
1932         GString *s_dollar = g_string_new ("$");
1933         gboolean debug = gnm_debug_flag ("stf");
1934
1935         g_ptr_array_set_size (po->formats, 0);
1936         g_ptr_array_set_size (po->formats_decimal, 0);
1937         g_ptr_array_set_size (po->formats_thousand, 0);
1938         g_ptr_array_set_size (po->formats_curr, 0);
1939
1940         lines_chunk = g_string_chunk_new (100 * 1024);
1941         lines = stf_parse_general (po, lines_chunk, data, data + strlen (data));
1942
1943         colcount = 0;
1944         for (lno = 0; lno < lines->len; lno++) {
1945                 GPtrArray *line = g_ptr_array_index (lines, lno);
1946                 colcount = MAX (colcount, line->len);
1947         }
1948
1949         // Ignore first line unless it is the only one
1950         sline = MIN ((int)lines->len - 1, 1);
1951
1952         g_ptr_array_set_size (po->formats, colcount);
1953         g_ptr_array_set_size (po->formats_decimal, colcount);
1954         g_ptr_array_set_size (po->formats_thousand, colcount);
1955         g_ptr_array_set_size (po->formats_curr, colcount);
1956         for (col = 0; col < colcount; col++) {
1957                 unsigned possible = STF_GUESS_ALL;
1958                 GOFormat *fmt = NULL;
1959                 gboolean seen_dot = FALSE;
1960                 gboolean seen_comma = FALSE;
1961                 int decimals_if_point = -1; // -1: unset; -2: inconsistent; >=0: count
1962                 int decimals_if_comma = -1; // -1: unset; -2: inconsistent; >=0: count
1963
1964                 for (lno = sline; possible && lno < lines->len; lno++) {
1965                         GPtrArray *line = g_ptr_array_index (lines, lno);
1966                         const char *data = col < line->len ? g_ptr_array_index (line, col) : "";
1967                         unsigned prev_possible = possible;
1968
1969                         if (*data == 0 || data[0] == '\'')
1970                                 continue;
1971
1972                         do_check_date (data, STF_GUESS_DATE_DMY, FALSE, FALSE, &possible, date_conv);
1973                         do_check_date (data, STF_GUESS_DATE_MDY, TRUE, FALSE, &possible, date_conv);
1974                         do_check_date (data, STF_GUESS_DATE_YMD, TRUE, TRUE, &possible, date_conv);
1975
1976                         if ((possible & STF_GUESS_NUMBER_DEC_EITHER) == STF_GUESS_NUMBER_DEC_EITHER) {
1977                                 const char *pdot = strstr (data, s_dot->str);
1978                                 const char *pcomma = strstr (data, s_comma->str);
1979                                 if (pdot && pcomma) {
1980                                         // Both -- last one is the decimal separator
1981                                         if (pdot > pcomma)
1982                                                 possible &= ~STF_GUESS_NUMBER_DEC_COMMA;
1983                                         else
1984                                                 possible &= ~STF_GUESS_NUMBER_DEC_POINT;
1985                                 } else if (pdot && strstr (pdot + s_dot->len, s_dot->str)) {
1986                                         // Two dots so they are thousands separators
1987                                         possible &= ~STF_GUESS_NUMBER_DEC_POINT;
1988                                 } else if (pcomma && strstr (pcomma + s_comma->len, s_comma->str)) {
1989                                         // Two commas so they are thousands separators
1990                                         possible &= ~STF_GUESS_NUMBER_DEC_COMMA;
1991                                 }
1992
1993                                 seen_dot = seen_dot || (pdot != 0);
1994                                 seen_comma = seen_comma || (pcomma != 0);
1995                         }
1996                         do_check_number (data, STF_GUESS_NUMBER_DEC_POINT,
1997                                          s_dot, s_comma, s_dollar,
1998                                          &possible, &decimals_if_point);
1999                         do_check_number (data, STF_GUESS_NUMBER_DEC_COMMA,
2000                                          s_comma, s_dot, s_dollar,
2001                                          &possible, &decimals_if_comma);
2002
2003                         if (possible != prev_possible && debug)
2004                                 g_printerr ("col=%d; after [%s] possible=0x%x\n", col, data, possible);
2005                 }
2006
2007                 if ((possible & STF_GUESS_NUMBER_DEC_EITHER) == STF_GUESS_NUMBER_DEC_EITHER &&
2008                     !seen_dot && !seen_comma) {
2009                         // It doesn't matter what the separators are
2010                         possible &= ~STF_GUESS_NUMBER_DEC_COMMA;
2011                 }
2012
2013                 switch (possible) {
2014                 case STF_GUESS_DATE_DMY:
2015                         fmt = go_format_new_from_XL ("d-mmm-yyyy");
2016                         break;
2017                 case STF_GUESS_DATE_MDY:
2018                         fmt = go_format_new_from_XL ("m/d/yyyy");
2019                         break;
2020                 case STF_GUESS_DATE_YMD:
2021                         fmt = go_format_new_from_XL ("yyyy-mm-dd");
2022                         break;
2023                 case STF_GUESS_NUMBER_DEC_POINT:
2024                         g_ptr_array_index (po->formats_decimal, col) = g_string_new (".");
2025                         g_ptr_array_index (po->formats_thousand, col) = g_string_new (",");
2026                         g_ptr_array_index (po->formats_curr, col) = g_string_new (s_dollar->str);
2027                         if (decimals_if_point > 0) {
2028                                 // Don't set format if decimals is zero
2029                                 GString *fmt_str = g_string_new (NULL);
2030                                 go_format_generate_number_str (fmt_str, 1, decimals_if_point, seen_comma, FALSE, FALSE, "", "");
2031                                 fmt = go_format_new_from_XL (fmt_str->str);
2032                                 g_string_free (fmt_str, TRUE);
2033                         }
2034                         break;
2035                 case STF_GUESS_NUMBER_DEC_COMMA:
2036                         g_ptr_array_index (po->formats_decimal, col) = g_string_new (",");
2037                         g_ptr_array_index (po->formats_thousand, col) = g_string_new (".");
2038                         g_ptr_array_index (po->formats_curr, col) = g_string_new (s_dollar->str);
2039                         if (decimals_if_comma > 0) {
2040                                 // Don't set format if decimals is zero
2041                                 GString *fmt_str = g_string_new (NULL);
2042                                 go_format_generate_number_str (fmt_str, 1, decimals_if_comma, seen_dot, FALSE, FALSE, "", "");
2043                                 fmt = go_format_new_from_XL (fmt_str->str);
2044                                 g_string_free (fmt_str, TRUE);
2045                         }
2046                         break;
2047                 default:
2048                         break;
2049                 }
2050
2051                 if (!fmt)
2052                         fmt = go_format_ref (go_format_general ());
2053                 g_ptr_array_index (po->formats, col) = fmt;
2054         }
2055
2056         stf_parse_general_free (lines);
2057         g_string_chunk_free (lines_chunk);
2058
2059         g_string_free (s_dot, TRUE);
2060         g_string_free (s_comma, TRUE);
2061         g_string_free (s_dollar, TRUE);
2062 }