src/stf-parse.c

   1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
   2 /*
   3  * stf-parse.c : Structured Text Format parser. (STF)
   4  *               A general purpose engine for parsing data
   5  *               in CSV and Fixed width format.
   6  *
   7  *
   8  * Copyright (C) Almer. S. Tigelaar.
   9  * EMail: almer1@dds.nl or almer-t@bigfoot.com
  10  *
  11  * Copyright (C) 2003 Andreas J. Guelzow <aguelzow@taliesin.ca>
  12  * Copyright (C) 2003,2008-2009 Morten Welinder <terra@gnome.org>
  13  *
  14  * This program is free software; you can redistribute it and/or modify
  15  * it under the terms of the GNU General Public License as published by
  16  * the Free Software Foundation; either version 2 of the License, or
  17  * (at your option) any later version.
  18  *
  19  * This program is distributed in the hope that it will be useful,
  20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22  * GNU General Public License for more details.
  23  *
  24  * You should have received a copy of the GNU General Public License
  25  * along with this program; if not, see <https://www.gnu.org/licenses/>.
  26  */
  27
  28 #include <gnumeric-config.h>
  29 #include <glib/gi18n-lib.h>
  30 #include "gnumeric.h"
  31 #include "stf-parse.h"
  32 #include "stf-export.h"
  33
  34 #include "workbook.h"
  35 #include "cell.h"
  36 #include "sheet.h"
  37 #include "expr.h"
  38 #include "clipboard.h"
  39 #include "sheet-style.h"
  40 #include "value.h"
  41 #include "mstyle.h"
  42 #include "number-match.h"
  43 #include "gutils.h"
  44 #include "parse-util.h"
  45 #include "number-match.h"
  46 #include "gnm-format.h"
  47 #include "ranges.h"
  48 #include <goffice/goffice.h>
  49
  50 #include <stdlib.h>
  51 #include <locale.h>
  52 #include <string.h>
  53
  54 #define SETUP_LOCALE_SWITCH char *oldlocale = NULL
  55
  56 #define START_LOCALE_SWITCH if (parseoptions->locale) {\
  57 oldlocale = g_strdup(go_setlocale (LC_ALL, NULL)); \
  58 go_setlocale(LC_ALL, parseoptions->locale);}
  59
  60 #define END_LOCALE_SWITCH if (oldlocale) {\
  61 go_setlocale(LC_ALL, oldlocale);\
  62 g_free (oldlocale);}
  63
  64 /* Source_t struct, used for interchanging parsing information between the low level parse functions */
  65 typedef struct {
  66         GStringChunk *chunk;
  67         char const *position;  /* Indicates the current position within data */
  68
  69         /* Used internally for fixed width parsing */
  70         int splitpos;          /* Indicates current position in splitpositions array */
  71         int linepos;           /* Position on the current line */
  72 } Source_t;
  73
  74 /* Struct used for autodiscovery */
  75 typedef struct {
  76         int start;
  77         int stop;
  78 } AutoDiscovery_t;
  79
  80 /*
  81  * Some silly dude make the length field an unsigned int.  C just does
  82  * not deal very well with that.
  83  */
  84 static inline int
  85 my_garray_len (GArray const *a)
  86 {
  87         return (int)a->len;
  88 }
  89
  90 static char *
  91 my_utf8_strchr (const char *p, gunichar uc)
  92 {
  93         return uc < 0x7f ? strchr (p, uc) : g_utf8_strchr (p, -1, uc);
  94 }
  95
  96 static int
  97 compare_terminator (char const *s, StfParseOptions_t *parseoptions)
  98 {
  99         guchar const *us = (guchar const *)s;
 100         GSList *l;
 101
 102         if (*us > parseoptions->compiled_terminator.max ||
 103             *us < parseoptions->compiled_terminator.min)
 104                 return 0;
 105
 106         for (l = parseoptions->terminator; l; l = l->next) {
 107                 char const *term = l->data;
 108                 char const *d = s;
 109
 110                 while (*term) {
 111                         if (*d != *term)
 112                                 goto next;
 113                         term++;
 114                         d++;
 115                 }
 116                 return d - s;
 117
 118         next:
 119                 ;
 120         }
 121         return 0;
 122 }
 123
 124
 125 /*******************************************************************************************************
 126  * STF PARSE OPTIONS : StfParseOptions related
 127  *******************************************************************************************************/
 128
 129 static void
 130 gnm_g_string_free (GString *s)
 131 {
 132         if (s) g_string_free (s, TRUE);
 133 }
 134
 135
 136 /**
 137  * stf_parse_options_new:
 138  *
 139  * This will return a new StfParseOptions_t struct.
 140  * The struct should, after being used, freed with stf_parse_options_free.
 141  **/
 142 static StfParseOptions_t *
 143 stf_parse_options_new (void)
 144 {
 145         StfParseOptions_t* parseoptions = g_new0 (StfParseOptions_t, 1);
 146
 147         parseoptions->parsetype   = PARSE_TYPE_NOTSET;
 148
 149         parseoptions->terminator  = NULL;
 150         stf_parse_options_add_line_terminator (parseoptions, "\r\n");
 151         stf_parse_options_add_line_terminator (parseoptions, "\n");
 152         stf_parse_options_add_line_terminator (parseoptions, "\r");
 153
 154         parseoptions->trim_spaces = (TRIM_TYPE_RIGHT | TRIM_TYPE_LEFT);
 155         parseoptions->locale = NULL;
 156
 157         parseoptions->splitpositions = NULL;
 158         stf_parse_options_fixed_splitpositions_clear (parseoptions);
 159
 160         parseoptions->stringindicator = '"';
 161         parseoptions->indicator_2x_is_single = TRUE;
 162         parseoptions->sep.duplicates = FALSE;
 163         parseoptions->trim_seps = FALSE;
 164
 165         parseoptions->sep.str = NULL;
 166         parseoptions->sep.chr = NULL;
 167
 168         parseoptions->col_autofit_array = NULL;
 169         parseoptions->col_import_array = NULL;
 170         parseoptions->col_import_array_len = 0;
 171         parseoptions->formats = g_ptr_array_new_with_free_func ((GDestroyNotify)go_format_unref);
 172         parseoptions->formats_decimal = g_ptr_array_new_with_free_func ((GDestroyNotify)gnm_g_string_free);
 173         parseoptions->formats_thousand = g_ptr_array_new_with_free_func ((GDestroyNotify)gnm_g_string_free);
 174         parseoptions->formats_curr = g_ptr_array_new_with_free_func ((GDestroyNotify)gnm_g_string_free);
 175
 176         parseoptions->cols_exceeded = FALSE;
 177         parseoptions->rows_exceeded = FALSE;
 178         parseoptions->ref_count = 1;
 179
 180         return parseoptions;
 181 }
 182
 183 /**
 184  * stf_parse_options_free:
 185  *
 186  * will free @parseoptions, note that this will not free the splitpositions
 187  * member (GArray) of the struct, the caller is responsible for that.
 188  **/
 189 void
 190 stf_parse_options_free (StfParseOptions_t *parseoptions)
 191 {
 192         g_return_if_fail (parseoptions != NULL);
 193
 194         if (parseoptions->ref_count-- > 1)
 195                 return;
 196
 197         g_free (parseoptions->col_import_array);
 198         g_free (parseoptions->col_autofit_array);
 199         g_free (parseoptions->locale);
 200         g_free (parseoptions->sep.chr);
 201
 202         if (parseoptions->sep.str) {
 203                 GSList *l;
 204
 205                 for (l = parseoptions->sep.str; l != NULL; l = l->next)
 206                         g_free ((char *) l->data);
 207                 g_slist_free (parseoptions->sep.str);
 208         }
 209
 210         g_array_free (parseoptions->splitpositions, TRUE);
 211
 212         stf_parse_options_clear_line_terminator (parseoptions);
 213
 214         g_ptr_array_free (parseoptions->formats, TRUE);
 215         g_ptr_array_free (parseoptions->formats_decimal, TRUE);
 216         g_ptr_array_free (parseoptions->formats_thousand, TRUE);
 217         g_ptr_array_free (parseoptions->formats_curr, TRUE);
 218
 219         g_free (parseoptions);
 220 }
 221
 222 static StfParseOptions_t *
 223 stf_parse_options_ref (StfParseOptions_t *parseoptions)
 224 {
 225         parseoptions->ref_count++;
 226         return parseoptions;
 227 }
 228
 229 GType
 230 stf_parse_options_get_type (void)
 231 {
 232         static GType t = 0;
 233
 234         if (t == 0) {
 235                 t = g_boxed_type_register_static ("StfParseOptions_t",
 236                          (GBoxedCopyFunc)stf_parse_options_ref,
 237                          (GBoxedFreeFunc)stf_parse_options_free);
 238         }
 239         return t;
 240 }
 241
 242 void
 243 stf_parse_options_set_type (StfParseOptions_t *parseoptions, StfParseType_t const parsetype)
 244 {
 245         g_return_if_fail (parseoptions != NULL);
 246         g_return_if_fail (parsetype == PARSE_TYPE_CSV || parsetype == PARSE_TYPE_FIXED);
 247
 248         parseoptions->parsetype = parsetype;
 249 }
 250
 251 static gint
 252 long_string_first (gchar const *a, gchar const *b)
 253 {
 254         /* This actually is UTF-8 safe.  */
 255         return strlen (b) - strlen (a);
 256 }
 257
 258 static void
 259 compile_terminators (StfParseOptions_t *parseoptions)
 260 {
 261         GSList *l;
 262
 263         parseoptions->terminator =
 264                 g_slist_sort (parseoptions->terminator,
 265                               (GCompareFunc)long_string_first);
 266         parseoptions->compiled_terminator.min = 255;
 267         parseoptions->compiled_terminator.max = 0;
 268         for (l = parseoptions->terminator; l; l = l->next) {
 269                 const guchar *term = l->data;
 270                 parseoptions->compiled_terminator.min =
 271                         MIN (parseoptions->compiled_terminator.min, *term);
 272                 parseoptions->compiled_terminator.max =
 273                         MAX (parseoptions->compiled_terminator.max, *term);
 274         }
 275 }
 276
 277 /**
 278  * stf_parse_options_add_line_terminator:
 279  *
 280  * This will add to the line terminators, in both the Fixed width and CSV delimited importers
 281  * this indicates the end of a row.
 282  *
 283  **/
 284 void
 285 stf_parse_options_add_line_terminator (StfParseOptions_t *parseoptions, char const *terminator)
 286 {
 287         g_return_if_fail (parseoptions != NULL);
 288         g_return_if_fail (terminator != NULL && *terminator != 0);
 289
 290         GO_SLIST_PREPEND (parseoptions->terminator, g_strdup (terminator));
 291         compile_terminators (parseoptions);
 292 }
 293
 294 /**
 295  * stf_parse_options_clear_line_terminator:
 296  *
 297  * This will clear the line terminator, in both the Fixed width and CSV delimited importers
 298  * this indicates the end of a row.
 299  *
 300  **/
 301 void
 302 stf_parse_options_clear_line_terminator (StfParseOptions_t *parseoptions)
 303 {
 304         g_return_if_fail (parseoptions != NULL);
 305
 306         g_slist_free_full (parseoptions->terminator, g_free);
 307         parseoptions->terminator = NULL;
 308         compile_terminators (parseoptions);
 309 }
 310
 311 /**
 312  * stf_parse_options_set_trim_spaces:
 313  *
 314  * If enabled will trim spaces in every parsed field on left and/or right
 315  * sides.
 316  **/
 317 void
 318 stf_parse_options_set_trim_spaces (StfParseOptions_t *parseoptions, StfTrimType_t const trim_spaces)
 319 {
 320         g_return_if_fail (parseoptions != NULL);
 321
 322         parseoptions->trim_spaces = trim_spaces;
 323 }
 324
 325 /**
 326  * stf_parse_options_csv_set_separators:
 327  * @parseoptions: #StfParseOptions_t
 328  * @character:
 329  * @seps: (element-type utf8): the separators to be used
 330  *
 331  * A copy is made of the parameters.
 332  **/
 333 void
 334 stf_parse_options_csv_set_separators (StfParseOptions_t *parseoptions,
 335                                       char const *character,
 336                                       GSList const *seps)
 337 {
 338         g_return_if_fail (parseoptions != NULL);
 339
 340         g_free (parseoptions->sep.chr);
 341         parseoptions->sep.chr = g_strdup (character);
 342
 343         g_slist_free_full (parseoptions->sep.str, g_free);
 344         parseoptions->sep.str =
 345                 g_slist_copy_deep ((GSList *)seps, (GCopyFunc)g_strdup, NULL);
 346 }
 347
 348 void
 349 stf_parse_options_csv_set_stringindicator (StfParseOptions_t *parseoptions, gunichar const stringindicator)
 350 {
 351         g_return_if_fail (parseoptions != NULL);
 352
 353         parseoptions->stringindicator = stringindicator;
 354 }
 355
 356 /**
 357  * stf_parse_options_csv_set_indicator_2x_is_single:
 358  * @indic_2x: a boolean value indicating whether we want to see two
 359  *              adjacent string indicators as a single string indicator
 360  *              that is part of the cell, rather than a terminator.
 361  **/
 362 void
 363 stf_parse_options_csv_set_indicator_2x_is_single (StfParseOptions_t *parseoptions,
 364                                                   gboolean const indic_2x)
 365 {
 366         g_return_if_fail (parseoptions != NULL);
 367
 368         parseoptions->indicator_2x_is_single = indic_2x;
 369 }
 370
 371 /**
 372  * stf_parse_options_csv_set_duplicates:
 373  * @parseoptions:
 374  * @duplicates: a boolean value indicating whether we want to see two
 375  *               separators right behind each other as one
 376  **/
 377 void
 378 stf_parse_options_csv_set_duplicates (StfParseOptions_t *parseoptions, gboolean const duplicates)
 379 {
 380         g_return_if_fail (parseoptions != NULL);
 381
 382         parseoptions->sep.duplicates = duplicates;
 383 }
 384
 385 /**
 386  * stf_parse_options_csv_set_trim_seps:
 387  * @trim_seps: a boolean value indicating whether we want to ignore
 388  *               separators at the beginning of lines
 389  **/
 390 void
 391 stf_parse_options_csv_set_trim_seps (StfParseOptions_t *parseoptions, gboolean const trim_seps)
 392 {
 393         g_return_if_fail (parseoptions != NULL);
 394
 395         parseoptions->trim_seps = trim_seps;
 396 }
 397
 398 /**
 399  * stf_parse_options_fixed_splitpositions_clear:
 400  *
 401  * This will clear the splitpositions (== points on which a line is split)
 402  **/
 403 void
 404 stf_parse_options_fixed_splitpositions_clear (StfParseOptions_t *parseoptions)
 405 {
 406         int minus_one = -1;
 407         g_return_if_fail (parseoptions != NULL);
 408
 409         if (parseoptions->splitpositions)
 410                 g_array_free (parseoptions->splitpositions, TRUE);
 411         parseoptions->splitpositions = g_array_new (FALSE, FALSE, sizeof (int));
 412
 413         g_array_append_val (parseoptions->splitpositions, minus_one);
 414 }
 415
 416 /**
 417  * stf_parse_options_fixed_splitpositions_add:
 418  *
 419  * @position will be added to the splitpositions.
 420  **/
 421 void
 422 stf_parse_options_fixed_splitpositions_add (StfParseOptions_t *parseoptions, int position)
 423 {
 424         unsigned int ui;
 425
 426         g_return_if_fail (parseoptions != NULL);
 427         g_return_if_fail (position >= 0);
 428
 429         for (ui = 0; ui < parseoptions->splitpositions->len - 1; ui++) {
 430                 int here = g_array_index (parseoptions->splitpositions, int, ui);
 431                 if (position == here)
 432                         return;
 433                 if (position < here)
 434                         break;
 435         }
 436
 437         g_array_insert_val (parseoptions->splitpositions, ui, position);
 438 }
 439
 440 void
 441 stf_parse_options_fixed_splitpositions_remove (StfParseOptions_t *parseoptions, int position)
 442 {
 443         unsigned int ui;
 444
 445         g_return_if_fail (parseoptions != NULL);
 446         g_return_if_fail (position >= 0);
 447
 448         for (ui = 0; ui < parseoptions->splitpositions->len - 1; ui++) {
 449                 int here = g_array_index (parseoptions->splitpositions, int, ui);
 450                 if (position == here)
 451                         g_array_remove_index (parseoptions->splitpositions, ui);
 452                 if (position <= here)
 453                         return;
 454         }
 455 }
 456
 457 int
 458 stf_parse_options_fixed_splitpositions_count (StfParseOptions_t *parseoptions)
 459 {
 460         return parseoptions->splitpositions->len;
 461 }
 462
 463 int
 464 stf_parse_options_fixed_splitpositions_nth (StfParseOptions_t *parseoptions, int n)
 465 {
 466         return g_array_index (parseoptions->splitpositions, int, n);
 467 }
 468
 469
 470 /**
 471  * stf_parse_options_valid:
 472  * @parseoptions: an import options struct
 473  *
 474  * Checks if @parseoptions is correctly filled
 475  *
 476  * returns : TRUE if it is correctly filled, FALSE otherwise.
 477  **/
 478 static gboolean
 479 stf_parse_options_valid (StfParseOptions_t *parseoptions)
 480 {
 481         g_return_val_if_fail (parseoptions != NULL, FALSE);
 482
 483         if (parseoptions->parsetype == PARSE_TYPE_FIXED) {
 484                 if (!parseoptions->splitpositions) {
 485                         g_warning ("STF: No splitpositions in struct");
 486                         return FALSE;
 487                 }
 488         }
 489
 490         return TRUE;
 491 }
 492
 493 /*******************************************************************************************************
 494  * STF PARSE : The actual routines that do the 'trick'
 495  *******************************************************************************************************/
 496
 497 static void
 498 trim_spaces_inplace (char *field, StfParseOptions_t const *parseoptions)
 499 {
 500         if (!field) return;
 501
 502         if (parseoptions->trim_spaces & TRIM_TYPE_LEFT) {
 503                 char *s = field;
 504
 505                 while (g_unichar_isspace (g_utf8_get_char (s)))
 506                         s = g_utf8_next_char (s);
 507
 508                 if (s != field)
 509                         memmove (field, s, 1 + strlen (s));
 510         }
 511
 512         if (parseoptions->trim_spaces & TRIM_TYPE_RIGHT) {
 513                 char *s = field + strlen (field);
 514
 515                 while (field != s) {
 516                         s = g_utf8_prev_char (s);
 517                         if (!g_unichar_isspace (g_utf8_get_char (s)))
 518                                 break;
 519                         *s = 0;
 520                 }
 521         }
 522 }
 523
 524 /**
 525  * stf_parse_csv_is_separator:
 526  *
 527  * returns NULL if @character is not a separator, a pointer to the character
 528  * after the separator otherwise.
 529  **/
 530 static char const *
 531 stf_parse_csv_is_separator (char const *character, char const *chr, GSList const *str)
 532 {
 533         g_return_val_if_fail (character != NULL, NULL);
 534
 535         if (*character == 0)
 536                 return NULL;
 537
 538         if (str) {
 539                 GSList const *l;
 540
 541                 for (l = str; l != NULL; l = l->next) {
 542                         char const *s = l->data;
 543                         char const *r;
 544                         glong cnt;
 545                         glong const len = g_utf8_strlen (s, -1);
 546
 547                         /* Don't compare past the end of the buffer! */
 548                         for (r = character, cnt = 0; cnt < len; cnt++, r = g_utf8_next_char (r))
 549                                 if (*r == '\0')
 550                                         break;
 551
 552                         if ((cnt == len) && (memcmp (character, s, len) == 0))
 553                                 return g_utf8_offset_to_pointer (character, len);
 554                 }
 555         }
 556
 557         if (chr && my_utf8_strchr (chr, g_utf8_get_char (character)))
 558                 return g_utf8_next_char(character);
 559
 560         return NULL;
 561 }
 562
 563 /*
 564  * stf_parse_eat_separators:
 565  *
 566  * skip over leading separators
 567  *
 568  */
 569
 570 static void
 571 stf_parse_eat_separators (Source_t *src, StfParseOptions_t *parseoptions)
 572 {
 573         char const *cur, *next;
 574
 575         g_return_if_fail (src != NULL);
 576         g_return_if_fail (parseoptions != NULL);
 577
 578         cur = src->position;
 579
 580         if (*cur == '\0' || compare_terminator (cur, parseoptions))
 581                 return;
 582         while ((next = stf_parse_csv_is_separator (cur, parseoptions->sep.chr, parseoptions->sep.str)))
 583                 cur = next;
 584         src->position = cur;
 585         return;
 586 }
 587
 588
 589 typedef enum {
 590         STF_CELL_ERROR,
 591         STF_CELL_EOF,
 592         STF_CELL_EOL,
 593         STF_CELL_FIELD_NO_SEP,
 594         STF_CELL_FIELD_SEP
 595 } StfParseCellRes;
 596
 597 static StfParseCellRes
 598 stf_parse_csv_cell (GString *text, Source_t *src, StfParseOptions_t *parseoptions)
 599 {
 600         char const *cur;
 601         gboolean saw_sep = FALSE;
 602
 603         g_return_val_if_fail (src != NULL, STF_CELL_ERROR);
 604         g_return_val_if_fail (parseoptions != NULL, STF_CELL_ERROR);
 605
 606         cur = src->position;
 607         g_return_val_if_fail (cur != NULL, STF_CELL_ERROR);
 608
 609         /* Skip whitespace, but stop at line terminators.  */
 610         while (1) {
 611                 int term_len;
 612
 613                 if (*cur == 0) {
 614                         src->position = cur;
 615                         return STF_CELL_EOF;
 616                 }
 617
 618                 term_len = compare_terminator (cur, parseoptions);
 619                 if (term_len) {
 620                         src->position = cur + term_len;
 621                         return STF_CELL_EOL;
 622                 }
 623
 624                 if ((parseoptions->trim_spaces & TRIM_TYPE_LEFT) == 0)
 625                         break;
 626
 627                 if (stf_parse_csv_is_separator (cur, parseoptions->sep.chr,
 628                                                 parseoptions->sep.str))
 629                         break;
 630
 631                 if (!g_unichar_isspace (g_utf8_get_char (cur)))
 632                         break;
 633                 cur = g_utf8_next_char (cur);
 634         }
 635
 636         if (parseoptions->stringindicator != 0 &&
 637             g_utf8_get_char (cur) == parseoptions->stringindicator) {
 638                 cur = g_utf8_next_char (cur);
 639                 while (*cur) {
 640                         gunichar uc = g_utf8_get_char (cur);
 641                         cur = g_utf8_next_char (cur);
 642
 643                         if (uc == parseoptions->stringindicator) {
 644                                 if (parseoptions->indicator_2x_is_single &&
 645                                     g_utf8_get_char (cur) == parseoptions->stringindicator)
 646                                         cur = g_utf8_next_char (cur);
 647                                 else {
 648                                         /* "field content"dropped-garbage,  */
 649                                         while (*cur && !compare_terminator (cur, parseoptions)) {
 650                                                 char const *post = stf_parse_csv_is_separator
 651                                                         (cur, parseoptions->sep.chr, parseoptions->sep.str);
 652                                                 if (post) {
 653                                                         cur = post;
 654                                                         saw_sep = TRUE;
 655                                                         break;
 656                                                 }
 657                                                 cur = g_utf8_next_char (cur);
 658                                         }
 659                                         break;
 660                                 }
 661                         }
 662
 663                         g_string_append_unichar (text, uc);
 664                 }
 665
 666                 /* We silently allow a missing terminating quote.  */
 667         } else {
 668                 /* Unquoted field.  */
 669
 670                 while (*cur && !compare_terminator (cur, parseoptions)) {
 671
 672                         char const *post = stf_parse_csv_is_separator
 673                                 (cur, parseoptions->sep.chr, parseoptions->sep.str);
 674                         if (post) {
 675                                 cur = post;
 676                                 saw_sep = TRUE;
 677                                 break;
 678                         }
 679
 680                         g_string_append_unichar (text, g_utf8_get_char (cur));
 681                         cur = g_utf8_next_char (cur);
 682                 }
 683
 684                 if (parseoptions->trim_spaces & TRIM_TYPE_RIGHT) {
 685                         while (text->len) {
 686                                 const char *last = g_utf8_prev_char (text->str + text->len);
 687                                 if (!g_unichar_isspace (g_utf8_get_char (last)))
 688                                         break;
 689                                 g_string_truncate (text, last - text->str);
 690                         }
 691                 }
 692         }
 693
 694         src->position = cur;
 695
 696         if (saw_sep && parseoptions->sep.duplicates)
 697                 stf_parse_eat_separators (src, parseoptions);
 698
 699         return saw_sep ? STF_CELL_FIELD_SEP : STF_CELL_FIELD_NO_SEP;
 700 }
 701
 702 /**
 703  * stf_parse_csv_line:
 704  *
 705  * This will parse one line from the current @src->position.
 706  * NOTE: The calling routine is responsible for freeing the result.
 707  *
 708  * returns : a GPtrArray of char*'s
 709  **/
 710 static GPtrArray *
 711 stf_parse_csv_line (Source_t *src, StfParseOptions_t *parseoptions)
 712 {
 713         GPtrArray *line;
 714         gboolean cont = FALSE;
 715         GString *text;
 716
 717         g_return_val_if_fail (src != NULL, NULL);
 718         g_return_val_if_fail (parseoptions != NULL, NULL);
 719
 720         line = g_ptr_array_new ();
 721         if (parseoptions->trim_seps)
 722                 stf_parse_eat_separators (src, parseoptions);
 723
 724         text = g_string_sized_new (30);
 725
 726         while (1) {
 727                 char *ctext;
 728                 StfParseCellRes res =
 729                         stf_parse_csv_cell (text, src, parseoptions);
 730                 trim_spaces_inplace (text->str, parseoptions);
 731                 ctext = g_string_chunk_insert_len (src->chunk,
 732                                                    text->str, text->len);
 733                 g_string_truncate (text, 0);
 734
 735                 switch (res) {
 736                 case STF_CELL_FIELD_NO_SEP:
 737                         g_ptr_array_add (line, ctext);
 738                         cont = FALSE;
 739                         break;
 740
 741                 case STF_CELL_FIELD_SEP:
 742                         g_ptr_array_add (line, ctext);
 743                         cont = TRUE;  /* Make sure we see one more field.  */
 744                         break;
 745
 746                 default:
 747                         if (cont)
 748                                 g_ptr_array_add (line, ctext);
 749                         g_string_free (text, TRUE);
 750                         return line;
 751                 }
 752         }
 753 }
 754
 755 /**
 756  * stf_parse_fixed_cell:
 757  *
 758  * returns a pointer to the parsed cell contents.
 759  **/
 760 static char *
 761 stf_parse_fixed_cell (Source_t *src, StfParseOptions_t *parseoptions)
 762 {
 763         char *res;
 764         char const *cur;
 765         int splitval;
 766
 767         g_return_val_if_fail (src != NULL, NULL);
 768         g_return_val_if_fail (parseoptions != NULL, NULL);
 769
 770         cur = src->position;
 771
 772         if (src->splitpos < my_garray_len (parseoptions->splitpositions))
 773                 splitval = (int) g_array_index (parseoptions->splitpositions, int, src->splitpos);
 774         else
 775                 splitval = -1;
 776
 777         while (*cur != 0 && !compare_terminator (cur, parseoptions) && splitval != src->linepos) {
 778                 src->linepos++;
 779                 cur = g_utf8_next_char (cur);
 780         }
 781
 782         res = g_string_chunk_insert_len (src->chunk,
 783                                          src->position,
 784                                          cur - src->position);
 785
 786         src->position = cur;
 787
 788         return res;
 789 }
 790
 791 /**
 792  * stf_parse_fixed_line:
 793  *
 794  * This will parse one line from the current @src->position.
 795  * It will return a GPtrArray with the cell contents as strings.
 796
 797  * NOTE: The calling routine is responsible for freeing result.
 798  **/
 799 static GPtrArray *
 800 stf_parse_fixed_line (Source_t *src, StfParseOptions_t *parseoptions)
 801 {
 802         GPtrArray *line;
 803
 804         g_return_val_if_fail (src != NULL, NULL);
 805         g_return_val_if_fail (parseoptions != NULL, NULL);
 806
 807         src->linepos = 0;
 808         src->splitpos = 0;
 809
 810         line = g_ptr_array_new ();
 811         while (*src->position != '\0' && !compare_terminator (src->position, parseoptions)) {
 812                 char *field = stf_parse_fixed_cell (src, parseoptions);
 813
 814                 trim_spaces_inplace (field, parseoptions);
 815                 g_ptr_array_add (line, field);
 816
 817                 src->splitpos++;
 818         }
 819
 820         while (line->len < parseoptions->splitpositions->len)
 821                 g_ptr_array_add (line, g_strdup (""));
 822
 823         return line;
 824 }
 825
 826 /**
 827  * stf_parse_general_free: (skip)
 828  */
 829 void
 830 stf_parse_general_free (GPtrArray *lines)
 831 {
 832         unsigned lineno;
 833         for (lineno = 0; lineno < lines->len; lineno++) {
 834                 GPtrArray *line = g_ptr_array_index (lines, lineno);
 835                 /* Fields are not freed here.  */
 836                 if (line)
 837                         g_ptr_array_free (line, TRUE);
 838         }
 839         g_ptr_array_free (lines, TRUE);
 840 }
 841
 842
 843 /**
 844  * stf_parse_general: (skip)
 845  *
 846  * Returns: (transfer full): a GPtrArray of lines, where each line is itself a
 847  * GPtrArray of strings.
 848  *
 849  * The caller must free this entire structure, for example by calling
 850  * stf_parse_general_free.
 851  **/
 852 GPtrArray *
 853 stf_parse_general (StfParseOptions_t *parseoptions,
 854                    GStringChunk *lines_chunk,
 855                    char const *data, char const *data_end)
 856 {
 857         GPtrArray *lines;
 858         Source_t src;
 859         int row;
 860         char const *valid_end = data_end;
 861
 862         g_return_val_if_fail (parseoptions != NULL, NULL);
 863         g_return_val_if_fail (data != NULL, NULL);
 864         g_return_val_if_fail (data_end != NULL, NULL);
 865         g_return_val_if_fail (stf_parse_options_valid (parseoptions), NULL);
 866         g_return_val_if_fail (g_utf8_validate (data, data_end-data, &valid_end), NULL);
 867
 868         src.chunk = lines_chunk;
 869         src.position = data;
 870         row = 0;
 871
 872         if ((data_end-data >= 3) && !strncmp(src.position, "\xEF\xBB\xBF", 3)) {
 873                 /* Skip over byte-order mark */
 874                 src.position += 3;
 875         }
 876
 877         lines = g_ptr_array_new ();
 878         while (*src.position != '\0' && src.position < data_end) {
 879                 GPtrArray *line;
 880
 881                 if (row == GNM_MAX_ROWS) {
 882                         parseoptions->rows_exceeded = TRUE;
 883                         break;
 884                 }
 885
 886                 line = parseoptions->parsetype == PARSE_TYPE_CSV
 887                         ? stf_parse_csv_line (&src, parseoptions)
 888                         : stf_parse_fixed_line (&src, parseoptions);
 889
 890                 g_ptr_array_add (lines, line);
 891                 if (parseoptions->parsetype != PARSE_TYPE_CSV)
 892                         src.position += compare_terminator (src.position, parseoptions);
 893                 row++;
 894         }
 895
 896         return lines;
 897 }
 898
 899 /**
 900  * stf_parse_lines: (skip)
 901  * @parseoptions: #StfParseOptions_t
 902  * @lines_chunk:
 903  * @data:
 904  * @maxlines:
 905  * @with_lineno:
 906  *
 907  * Returns: (transfer full): a GPtrArray of lines, where each line is itself a
 908  * GPtrArray of strings.
 909  *
 910  * The caller must free this entire structure, for example by calling
 911  * stf_parse_general_free.
 912  **/
 913 GPtrArray *
 914 stf_parse_lines (StfParseOptions_t *parseoptions,
 915                  GStringChunk *lines_chunk,
 916                  char const *data,
 917                  int maxlines, gboolean with_lineno)
 918 {
 919         GPtrArray *lines;
 920         int lineno = 1;
 921
 922         g_return_val_if_fail (data != NULL, NULL);
 923
 924         lines = g_ptr_array_new ();
 925         while (*data) {
 926                 char const *data0 = data;
 927                 GPtrArray *line = g_ptr_array_new ();
 928
 929                 if (with_lineno) {
 930                         char buf[4 * sizeof (int)];
 931                         sprintf (buf, "%d", lineno);
 932                         g_ptr_array_add (line,
 933                                          g_string_chunk_insert (lines_chunk, buf));
 934                 }
 935
 936                 while (1) {
 937                         int termlen = compare_terminator (data, parseoptions);
 938                         if (termlen > 0 || *data == 0) {
 939                                 g_ptr_array_add (line,
 940                                                  g_string_chunk_insert_len (lines_chunk,
 941                                                                             data0,
 942                                                                             data - data0));
 943                                 data += termlen;
 944                                 break;
 945                         } else
 946                                 data = g_utf8_next_char (data);
 947                 }
 948
 949                 g_ptr_array_add (lines, line);
 950
 951                 lineno++;
 952                 if (lineno >= maxlines)
 953                         break;
 954         }
 955         return lines;
 956 }
 957
 958 char const *
 959 stf_parse_find_line (StfParseOptions_t *parseoptions,
 960                      char const *data,
 961                      int line)
 962 {
 963         while (line > 0) {
 964                 int termlen = compare_terminator (data, parseoptions);
 965                 if (termlen > 0) {
 966                         data += termlen;
 967                         line--;
 968                 } else if (*data == 0) {
 969                         return data;
 970                 } else {
 971                         data = g_utf8_next_char (data);
 972                 }
 973         }
 974         return data;
 975 }
 976
 977
 978 /**
 979  * stf_parse_options_fixed_autodiscover:
 980  * @parseoptions: a Parse options struct.
 981  * @data: The actual data.
 982  * @data_end: data end.
 983  *
 984  * Automatically try to discover columns in the text to be parsed.
 985  * We ignore empty lines (only containing parseoptions->terminator)
 986  *
 987  * FIXME: This is so extremely ugly that I am too tired to rewrite it right now.
 988  *        Think hard of a better more flexible solution...
 989  **/
 990 void
 991 stf_parse_options_fixed_autodiscover (StfParseOptions_t *parseoptions,
 992                                       char const *data, char const *data_end)
 993 {
 994         char const *iterator = data;
 995         GSList *list = NULL;
 996         GSList *list_start = NULL;
 997         int lines = 0;
 998         int effective_lines = 0;
 999         int max_line_length = 0;
1000         int *line_begin_hits = NULL;
1001         int *line_end_hits = NULL;
1002         int i;
1003
1004         stf_parse_options_fixed_splitpositions_clear (parseoptions);
1005
1006         /*
1007          * First take a look at all possible white space combinations
1008          */
1009         while (*iterator && iterator < data_end) {
1010                 gboolean begin_recorded = FALSE;
1011                 AutoDiscovery_t *disc = NULL;
1012                 int position = 0;
1013                 int termlen = 0;
1014
1015                 while (*iterator && (termlen = compare_terminator (iterator, parseoptions)) == 0) {
1016                         if (!begin_recorded && *iterator == ' ') {
1017                                 disc = g_new0 (AutoDiscovery_t, 1);
1018
1019                                 disc->start = position;
1020
1021                                 begin_recorded = TRUE;
1022                         } else if (begin_recorded && *iterator != ' ') {
1023                                 disc->stop = position;
1024                                 list = g_slist_prepend (list, disc);
1025
1026                                 begin_recorded = FALSE;
1027                                 disc = NULL;
1028                         }
1029
1030                         position++;
1031                         iterator++;
1032                 }
1033
1034                 if (position > max_line_length)
1035                         max_line_length = position;
1036
1037                 /*
1038                  * If there are excess spaces at the end of
1039                  * the line : ignore them
1040                  */
1041                 g_free (disc);
1042
1043                 /*
1044                  * Hop over the terminator
1045                  */
1046                 iterator += termlen;
1047
1048                 if (position != 0)
1049                         effective_lines++;
1050
1051                 lines++;
1052         }
1053
1054         list       = g_slist_reverse (list);
1055         list_start = list;
1056
1057         /*
1058          * Kewl stuff:
1059          * Look at the number of hits at each line position
1060          * if the number of hits equals the number of lines
1061          * we can be pretty sure this is the start or end
1062          * of a column, we filter out empty columns
1063          * later
1064          */
1065         line_begin_hits = g_new0 (int, max_line_length + 1);
1066         line_end_hits   = g_new0 (int, max_line_length + 1);
1067
1068         while (list) {
1069                 AutoDiscovery_t *disc = list->data;
1070
1071                 line_begin_hits[disc->start]++;
1072                 line_end_hits[disc->stop]++;
1073
1074                 g_free (disc);
1075
1076                 list = g_slist_next (list);
1077         }
1078         g_slist_free (list_start);
1079
1080         for (i = 0; i < max_line_length + 1; i++)
1081                 if (line_begin_hits[i] == effective_lines || line_end_hits[i] == effective_lines)
1082                         stf_parse_options_fixed_splitpositions_add (parseoptions, i);
1083
1084         /*
1085          * Do some corrections to the initial columns
1086          * detected here, we obviously don't need to
1087          * do this if there are no columns at all.
1088          */
1089         if (my_garray_len (parseoptions->splitpositions) > 0) {
1090                 /*
1091                  * Try to find columns that look like:
1092                  *
1093                  * Example     100
1094                  * Example2      9
1095                  *
1096                  * (In other words : Columns with left & right justification with
1097                  *  a minimum of 2 spaces in the middle)
1098                  * Split these columns in 2
1099                  */
1100
1101                 for (i = 0; i < my_garray_len (parseoptions->splitpositions) - 1; i++) {
1102                         int begin = g_array_index (parseoptions->splitpositions, int, i);
1103                         int end   = g_array_index (parseoptions->splitpositions, int, i + 1);
1104                         int num_spaces   = -1;
1105                         int spaces_start = 0;
1106                         gboolean right_aligned = TRUE;
1107                         gboolean left_aligned  = TRUE;
1108                         gboolean has_2_spaces  = TRUE;
1109
1110                         iterator = data;
1111                         lines = 0;
1112                         while (*iterator && iterator < data_end) {
1113                                 gboolean trigger = FALSE;
1114                                 gboolean space_trigger = FALSE;
1115                                 int pos = 0;
1116
1117                                 num_spaces   = -1;
1118                                 spaces_start = 0;
1119                                 while (*iterator && !compare_terminator (iterator, parseoptions)) {
1120                                         if (pos == begin) {
1121                                                 if (*iterator == ' ')
1122                                                         left_aligned = FALSE;
1123
1124                                                 trigger = TRUE;
1125                                         } else if (pos == end - 1) {
1126                                                 if (*iterator == ' ')
1127                                                         right_aligned = FALSE;
1128
1129                                                 trigger = FALSE;
1130                                         }
1131
1132                                         if (trigger || pos == end - 1) {
1133                                                 if (!space_trigger && *iterator == ' ') {
1134                                                         space_trigger = TRUE;
1135                                                         spaces_start = pos;
1136                                                 } else if (space_trigger && *iterator != ' ') {
1137                                                         space_trigger = FALSE;
1138                                                         num_spaces = pos - spaces_start;
1139                                                 }
1140                                         }
1141
1142                                         iterator++;
1143                                         pos++;
1144                                 }
1145
1146                                 if (num_spaces < 2)
1147                                         has_2_spaces = FALSE;
1148
1149                                 if (*iterator)
1150                                         iterator++;
1151
1152                                 lines++;
1153                         }
1154
1155                         /*
1156                          * If this column meets all the criteria
1157                          * split it into two at the last measured
1158                          * spaces_start + num_spaces
1159                          */
1160                         if (has_2_spaces && right_aligned && left_aligned) {
1161                                 int val = (((spaces_start + num_spaces) - spaces_start) / 2) + spaces_start;
1162
1163                                 g_array_insert_val (parseoptions->splitpositions, i + 1, val);
1164
1165                                 /*
1166                                  * Skip over the inserted column
1167                                  */
1168                                 i++;
1169                         }
1170                 }
1171
1172                 /*
1173                  * Remove empty columns here if needed
1174                  */
1175                 for (i = 0; i < my_garray_len (parseoptions->splitpositions) - 1; i++) {
1176                         int begin = g_array_index (parseoptions->splitpositions, int, i);
1177                         int end = g_array_index (parseoptions->splitpositions, int, i + 1);
1178                         gboolean only_spaces = TRUE;
1179
1180                         iterator = data;
1181                         lines = 0;
1182                         while (*iterator && iterator < data_end) {
1183                                 gboolean trigger = FALSE;
1184                                 int pos = 0;
1185
1186                                 while (*iterator && !compare_terminator (iterator, parseoptions)) {
1187                                         if (pos == begin)
1188                                                 trigger = TRUE;
1189                                         else if (pos == end)
1190                                                 trigger = FALSE;
1191
1192                                         if (trigger) {
1193                                                 if (*iterator != ' ')
1194                                                         only_spaces = FALSE;
1195                                         }
1196
1197                                         iterator++;
1198                                         pos++;
1199                                 }
1200
1201                                 if (*iterator)
1202                                         iterator++;
1203
1204                                 lines++;
1205                         }
1206
1207                         /*
1208                          * The column only contains spaces
1209                          * remove it
1210                          */
1211                         if (only_spaces) {
1212                                 g_array_remove_index (parseoptions->splitpositions, i);
1213
1214                                 /*
1215                                  * We HAVE to make sure that the next column (end) also
1216                                  * gets checked out. If we don't decrease "i" here, we
1217                                  * will skip over it as the indexes shift down after
1218                                  * the removal
1219                                  */
1220                                 i--;
1221                         }
1222                 }
1223         }
1224
1225         g_free (line_begin_hits);
1226         g_free (line_end_hits);
1227 }
1228
1229 /*******************************************************************************************************
1230  * STF PARSE HL: high-level functions that dump the raw data returned by the low-level parsing
1231  *               functions into something meaningful (== application specific)
1232  *******************************************************************************************************/
1233
1234 /*
1235  * This is more or less as gnm_cell_set_text, except...
1236  * 1. Unknown names are not allowed.
1237  * 2. Only '=' can start an expression.
1238  */
1239
1240 static void
1241 stf_cell_set_text (GnmCell *cell, char const *text)
1242 {
1243         GnmExprTop const *texpr;
1244         GnmValue *val;
1245         GOFormat const *fmt = gnm_style_get_format (gnm_cell_get_style (cell));
1246         const GODateConventions *date_conv = sheet_date_conv (cell->base.sheet);
1247
1248         if (!go_format_is_text (fmt) && *text == '=' && text[1] != 0) {
1249                 GnmExprParseFlags flags =
1250                         GNM_EXPR_PARSE_UNKNOWN_NAMES_ARE_INVALID;
1251                 const char *expr_start = text + 1;
1252                 GnmParsePos pos;
1253                 val = NULL;
1254                 parse_pos_init_cell (&pos, cell);
1255                 texpr = gnm_expr_parse_str (expr_start, &pos, flags,
1256                                             NULL, NULL);
1257         } else {
1258                 texpr = NULL;
1259                 val = format_match (text, fmt, date_conv);
1260         }
1261
1262         if (!val && !texpr)
1263                 val = value_new_string (text);
1264
1265         if (val)
1266                 gnm_cell_set_value (cell, val);
1267         else {
1268                 gnm_cell_set_expr (cell, texpr);
1269                 gnm_expr_top_unref (texpr);
1270         }
1271 }
1272
1273 static void
1274 stf_read_remember_settings (Workbook *book, StfParseOptions_t *po)
1275 {
1276         if (po->parsetype == PARSE_TYPE_CSV) {
1277                 GnmStfExport *stfe = gnm_stf_get_stfe (G_OBJECT (book));
1278                 char quote[6];
1279                 int length = g_unichar_to_utf8 (po->stringindicator, quote);
1280                 if (length > 5) {
1281                         quote[0] = '"';
1282                         quote[1] = '\0';
1283                 } else quote[length] = '\0';
1284
1285                 g_object_set (G_OBJECT (stfe), "separator", po->sep.chr, "quote", &quote, NULL);
1286
1287                 if ((po->terminator != NULL) &&  (po->terminator->data != NULL))
1288                         g_object_set (G_OBJECT (stfe), "eol", po->terminator->data, NULL);
1289         }
1290 }
1291
1292 gboolean
1293 stf_parse_sheet (StfParseOptions_t *parseoptions,
1294                  char const *data, char const *data_end,
1295                  Sheet *sheet, int start_col, int start_row)
1296 {
1297         int row;
1298         unsigned int lrow;
1299         GStringChunk *lines_chunk;
1300         GPtrArray *lines;
1301         gboolean result = TRUE;
1302         int col;
1303         unsigned int lcol;
1304         size_t nformats;
1305
1306         SETUP_LOCALE_SWITCH;
1307
1308         g_return_val_if_fail (parseoptions != NULL, FALSE);
1309         g_return_val_if_fail (data != NULL, FALSE);
1310         g_return_val_if_fail (IS_SHEET (sheet), FALSE);
1311
1312         if (!data_end)
1313                 data_end = data + strlen (data);
1314
1315         lines_chunk = g_string_chunk_new (100 * 1024);
1316         lines = stf_parse_general (parseoptions, lines_chunk, data, data_end);
1317         if (lines == NULL)
1318                 result = FALSE;
1319
1320         col = start_col;
1321         nformats = parseoptions->formats->len;
1322         for (lcol = 0; lcol < nformats; lcol++) {
1323                 GOFormat const *fmt = g_ptr_array_index (parseoptions->formats, lcol);
1324                 GnmStyle *mstyle;
1325                 gboolean want_col =
1326                         (parseoptions->col_import_array == NULL ||
1327                          parseoptions->col_import_array_len <= lcol ||
1328                          parseoptions->col_import_array[lcol]);
1329                 if (!want_col || col >= gnm_sheet_get_max_cols (sheet))
1330                         continue;
1331
1332                 if (fmt && !go_format_is_general (fmt)) {
1333                         GnmRange r;
1334                         int end_row = MIN (start_row + (int)lines->len - 1,
1335                                            gnm_sheet_get_last_row (sheet));
1336
1337                         range_init (&r, col, start_row, col, end_row);
1338                         mstyle = gnm_style_new ();
1339                         gnm_style_set_format (mstyle, fmt);
1340                         sheet_apply_style (sheet, &r, mstyle);
1341                 }
1342                 col++;
1343         }
1344
1345         START_LOCALE_SWITCH;
1346         for (row = start_row, lrow = 0;
1347              result && lrow < lines->len;
1348              row++, lrow++) {
1349                 GPtrArray *line;
1350
1351                 if (row >= gnm_sheet_get_max_rows (sheet)) {
1352                         if (!parseoptions->rows_exceeded) {
1353                                 /* FIXME: What locale?  */
1354                                 g_warning (_("There are more rows of data than "
1355                                              "there is room for in the sheet.  Extra "
1356                                              "rows will be ignored."));
1357                                 parseoptions->rows_exceeded = TRUE;
1358                         }
1359                         break;
1360                 }
1361
1362                 col = start_col;
1363                 line = g_ptr_array_index (lines, lrow);
1364
1365                 for (lcol = 0; lcol < line->len; lcol++) {
1366                         GOFormat const *fmt = lcol < nformats
1367                                 ? g_ptr_array_index (parseoptions->formats, lcol)
1368                                 : go_format_general ();
1369                         char const *text = g_ptr_array_index (line, lcol);
1370                         gboolean want_col =
1371                                 (parseoptions->col_import_array == NULL ||
1372                                  parseoptions->col_import_array_len <= lcol ||
1373                                  parseoptions->col_import_array[lcol]);
1374                         if (!want_col)
1375                                 continue;
1376
1377                         if (col >= gnm_sheet_get_max_cols (sheet)) {
1378                                 if (!parseoptions->cols_exceeded) {
1379                                         /* FIXME: What locale?  */
1380                                         g_warning (_("There are more columns of data than "
1381                                                      "there is room for in the sheet.  Extra "
1382                                                      "columns will be ignored."));
1383                                         parseoptions->cols_exceeded = TRUE;
1384                                 }
1385                                 break;
1386                         }
1387                         if (text && *text) {
1388                                 GnmCell *cell = sheet_cell_fetch (sheet, col, row);
1389                                 if (!go_format_is_text (fmt) &&
1390                                     lcol < parseoptions->formats_decimal->len &&
1391                                     g_ptr_array_index (parseoptions->formats_decimal, lcol)) {
1392                                         GOFormatFamily fam;
1393                                         GnmValue *v = format_match_decimal_number_with_locale
1394                                                 (text, &fam,
1395                                                  g_ptr_array_index (parseoptions->formats_curr, lcol),
1396                                                  g_ptr_array_index (parseoptions->formats_thousand, lcol),
1397                                                  g_ptr_array_index (parseoptions->formats_decimal, lcol));
1398                                         if (!v)
1399                                                 v = value_new_string (text);
1400                                         sheet_cell_set_value (cell, v);
1401                                 } else {
1402
1403                                         stf_cell_set_text (cell, text);
1404                                 }
1405                         }
1406                         col++;
1407                 }
1408
1409                 g_ptr_array_index (lines, lrow) = NULL;
1410                 g_ptr_array_free (line, TRUE);
1411         }
1412         END_LOCALE_SWITCH;
1413
1414         for (lcol = 0, col = start_col;
1415              lcol < parseoptions->col_import_array_len  && col < gnm_sheet_get_max_cols (sheet);
1416              lcol++) {
1417                 if (parseoptions->col_import_array == NULL ||
1418                     parseoptions->col_import_array_len <= lcol ||
1419                     parseoptions->col_import_array[lcol]) {
1420                         if (parseoptions->col_autofit_array == NULL ||
1421                             parseoptions->col_autofit_array[lcol]) {
1422                                 ColRowIndexList *list = colrow_get_index_list (col, col, NULL);
1423                                 ColRowStateGroup  *state = colrow_set_sizes (sheet, TRUE, list, -1, 0, -1);
1424                                 colrow_index_list_destroy (list);
1425                                 g_slist_free (state);
1426                         }
1427                         col++;
1428                 }
1429         }
1430
1431         g_string_chunk_free (lines_chunk);
1432         if (lines)
1433                 stf_parse_general_free (lines);
1434         if (result)
1435                 stf_read_remember_settings (sheet->workbook, parseoptions);
1436         return result;
1437 }
1438
1439 GnmCellRegion *
1440 stf_parse_region (StfParseOptions_t *parseoptions, char const *data, char const *data_end,
1441                   Workbook const *wb)
1442 {
1443         static GODateConventions const default_conv = {FALSE};
1444         GODateConventions const *date_conv = wb ? workbook_date_conv (wb) : &default_conv;
1445
1446         GnmCellRegion *cr;
1447         unsigned int row, colhigh = 0;
1448         GStringChunk *lines_chunk;
1449         GPtrArray *lines;
1450         size_t nformats;
1451
1452         SETUP_LOCALE_SWITCH;
1453
1454         g_return_val_if_fail (parseoptions != NULL, NULL);
1455         g_return_val_if_fail (data != NULL, NULL);
1456
1457         START_LOCALE_SWITCH;
1458
1459         cr = gnm_cell_region_new (NULL);
1460
1461         if (!data_end)
1462                 data_end = data + strlen (data);
1463         lines_chunk = g_string_chunk_new (100 * 1024);
1464         lines = stf_parse_general (parseoptions, lines_chunk, data, data_end);
1465         nformats = parseoptions->formats->len;
1466         for (row = 0; row < lines->len; row++) {
1467                 GPtrArray *line = g_ptr_array_index (lines, row);
1468                 unsigned int col, targetcol = 0;
1469                 for (col = 0; col < line->len; col++) {
1470                         if (parseoptions->col_import_array == NULL ||
1471                             parseoptions->col_import_array_len <= col ||
1472                             parseoptions->col_import_array[col]) {
1473                                 const char *text = g_ptr_array_index (line, col);
1474                                 if (text) {
1475                                         GOFormat *fmt = NULL;
1476                                         GnmValue *v;
1477                                         GnmCellCopy *cc;
1478
1479                                         if (col < nformats)
1480                                                 fmt = g_ptr_array_index (parseoptions->formats, col);
1481                                         v = format_match (text, fmt, date_conv);
1482                                         if (!v)
1483                                                 v = value_new_string (text);
1484
1485                                         cc = gnm_cell_copy_new (cr, targetcol, row);
1486                                         cc->val  = v;
1487                                         cc->texpr = NULL;
1488                                         targetcol++;
1489                                         if (targetcol > colhigh)
1490                                                 colhigh = targetcol;
1491                                 }
1492                         }
1493                 }
1494         }
1495         stf_parse_general_free (lines);
1496         g_string_chunk_free (lines_chunk);
1497
1498         END_LOCALE_SWITCH;
1499
1500         cr->cols    = (colhigh > 0) ? colhigh : 1;
1501         cr->rows    = row;
1502
1503         return cr;
1504 }
1505
1506 static int
1507 int_sort (void const *a, void const *b)
1508 {
1509         return *(int const *)a - *(int const *)b;
1510 }
1511
1512 static int
1513 count_character (GPtrArray *lines, gunichar c, double quantile)
1514 {
1515         int *counts, res;
1516         unsigned int lno, cno;
1517
1518         if (lines->len == 0)
1519                 return 0;
1520
1521         counts = g_new (int, lines->len);
1522         for (lno = cno = 0; lno < lines->len; lno++) {
1523                 int count = 0;
1524                 GPtrArray *boxline = g_ptr_array_index (lines, lno);
1525                 char const *line = g_ptr_array_index (boxline, 0);
1526
1527                 /* Ignore empty lines.  */
1528                 if (*line == 0)
1529                         continue;
1530
1531                 while (*line) {
1532                         if (g_utf8_get_char (line) == c)
1533                                 count++;
1534                         line = g_utf8_next_char (line);
1535                 }
1536
1537                 counts[cno++] = count;
1538         }
1539
1540         if (cno == 0)
1541                 res = 0;
1542         else {
1543                 unsigned int qi = (unsigned int)ceil (quantile * cno);
1544                 qsort (counts, cno, sizeof (counts[0]), int_sort);
1545                 if (qi == cno)
1546                         qi--;
1547                 res = counts[qi];
1548         }
1549
1550         g_free (counts);
1551
1552         return res;
1553 }
1554
1555 static void
1556 dump_guessed_options (const StfParseOptions_t *res)
1557 {
1558         GSList *l;
1559         char ubuffer[6 + 1];
1560         unsigned ui;
1561
1562         g_printerr ("Guessed format:\n");
1563         switch (res->parsetype) {
1564         case PARSE_TYPE_CSV:
1565                 g_printerr ("  type = sep\n");
1566                 g_printerr ("  separator = %s\n",
1567                             res->sep.chr ? res->sep.chr : "(none)");
1568                 g_printerr ("    see two as one = %s\n",
1569                             res->sep.duplicates ? "yes" : "no");
1570                 break;
1571         case PARSE_TYPE_FIXED:
1572                 g_printerr ("  type = sep\n");
1573                 break;
1574         default:
1575                 ;
1576         }
1577         g_printerr ("  trim space = %d\n", res->trim_spaces);
1578
1579         ubuffer[g_unichar_to_utf8 (res->stringindicator, ubuffer)] = 0;
1580         g_printerr ("  string indicator = %s\n", ubuffer);
1581         g_printerr ("    see two as one = %s\n",
1582                     res->indicator_2x_is_single ? "yes" : "no");
1583
1584         g_printerr ("  line terminators =");
1585         for (l = res->terminator; l; l = l->next) {
1586                 const char *t = l->data;
1587                 if (strcmp (t, "\n") == 0)
1588                         g_printerr (" unix");
1589                 else if (strcmp (t, "\r") == 0)
1590                         g_printerr (" mac");
1591                 else if (strcmp (t, "\r\n") == 0)
1592                         g_printerr (" dos");
1593                 else
1594                         g_printerr (" other");
1595         }
1596         g_printerr ("\n");
1597
1598         for (ui = 0; ui < res->formats->len; ui++) {
1599                 GOFormat const *fmt = g_ptr_array_index (res->formats, ui);
1600                 const GString *decimal = ui < res->formats_decimal->len
1601                         ? g_ptr_array_index (res->formats_decimal, ui)
1602                         : NULL;
1603                 const GString *thousand = ui < res->formats_thousand->len
1604                         ? g_ptr_array_index (res->formats_thousand, ui)
1605                         : NULL;
1606
1607                 g_printerr ("  fmt.%d = %s\n", ui, go_format_as_XL (fmt));
1608                 if (decimal)
1609                         g_printerr ("  fmt.%d.dec = %s\n", ui, decimal->str);
1610                 if (thousand)
1611                         g_printerr ("  fmt.%d.thou = %s\n", ui, thousand->str);
1612         }
1613 }
1614
1615 /**
1616  * stf_parse_options_guess:
1617  * @data: the input data.
1618  *
1619  * Returns: (transfer full): the guessed options.
1620  **/
1621 StfParseOptions_t *
1622 stf_parse_options_guess (char const *data)
1623 {
1624         StfParseOptions_t *res;
1625         GStringChunk *lines_chunk;
1626         GPtrArray *lines;
1627         int tabcount;
1628         int sepcount;
1629         gunichar sepchar = go_locale_get_arg_sep ();
1630
1631         g_return_val_if_fail (data != NULL, NULL);
1632
1633         res = stf_parse_options_new ();
1634         lines_chunk = g_string_chunk_new (100 * 1024);
1635         lines = stf_parse_lines (res, lines_chunk, data, 1000, FALSE);
1636
1637         tabcount = count_character (lines, '\t', 0.2);
1638         sepcount = count_character (lines, sepchar, 0.2);
1639
1640         /* At least one tab per line and enough to separate every
1641            would-be sepchars.  */
1642         if (tabcount >= 1 && tabcount >= sepcount - 1)
1643                 stf_parse_options_csv_set_separators (res, "\t", NULL);
1644         else {
1645                 gunichar c;
1646
1647                 /*
1648                  * Try a few more or less likely characters and pick the first
1649                  * one that occurs on at least half the lines.
1650                  *
1651                  * The order is mostly random, although ' ' and '!' which
1652                  * could very easily occur in text are put last.
1653                  */
1654                 if (count_character (lines, (c = sepchar), 0.5) > 0 ||
1655                     count_character (lines, (c = go_locale_get_col_sep ()), 0.5) > 0 ||
1656                     count_character (lines, (c = ':'), 0.5) > 0 ||
1657                     count_character (lines, (c = ','), 0.5) > 0 ||
1658                     count_character (lines, (c = ';'), 0.5) > 0 ||
1659                     count_character (lines, (c = '|'), 0.5) > 0 ||
1660                     count_character (lines, (c = '!'), 0.5) > 0 ||
1661                     count_character (lines, (c = ' '), 0.5) > 0) {
1662                         char sep[7];
1663                         sep[g_unichar_to_utf8 (c, sep)] = 0;
1664                         if (c == ' ')
1665                                 strcat (sep, "\t");
1666                         stf_parse_options_csv_set_separators (res, sep, NULL);
1667                 }
1668         }
1669
1670         // For now, always separated:
1671         stf_parse_options_set_type (res, PARSE_TYPE_CSV);
1672
1673         switch (res->parsetype) {
1674         case PARSE_TYPE_CSV: {
1675                 gboolean dups =
1676                         res->sep.chr &&
1677                         strchr (res->sep.chr, ' ') != NULL;
1678                 gboolean trim =
1679                         res->sep.chr &&
1680                         strchr (res->sep.chr, ' ') != NULL;
1681
1682                 stf_parse_options_set_trim_spaces (res, TRIM_TYPE_LEFT | TRIM_TYPE_RIGHT);
1683                 stf_parse_options_csv_set_indicator_2x_is_single (res, TRUE);
1684                 stf_parse_options_csv_set_duplicates (res, dups);
1685                 stf_parse_options_csv_set_trim_seps (res, trim);
1686
1687                 stf_parse_options_csv_set_stringindicator (res, '"');
1688                 break;
1689         }
1690
1691         case PARSE_TYPE_FIXED:
1692                 break;
1693
1694         default:
1695                 g_assert_not_reached ();
1696         }
1697
1698         stf_parse_general_free (lines);
1699         g_string_chunk_free (lines_chunk);
1700
1701         stf_parse_options_guess_formats (res, data);
1702
1703         if (gnm_debug_flag ("stf"))
1704                 dump_guessed_options (res);
1705
1706         return res;
1707 }
1708
1709 /**
1710  * stf_parse_options_guess_csv:
1711  * @data: the CSV input data.
1712  *
1713  * Returns: (transfer full): the guessed options.
1714  **/
1715 StfParseOptions_t *
1716 stf_parse_options_guess_csv (char const *data)
1717 {
1718         StfParseOptions_t *res;
1719         GStringChunk *lines_chunk;
1720         GPtrArray *lines;
1721         char *sep = NULL;
1722         char const *quoteline = NULL;
1723         int pass;
1724         gunichar stringind = '"';
1725
1726         g_return_val_if_fail (data != NULL, NULL);
1727
1728         res = stf_parse_options_new ();
1729         stf_parse_options_set_type (res, PARSE_TYPE_CSV);
1730         stf_parse_options_set_trim_spaces (res, TRIM_TYPE_LEFT | TRIM_TYPE_RIGHT);
1731         stf_parse_options_csv_set_indicator_2x_is_single (res, TRUE);
1732         stf_parse_options_csv_set_duplicates (res, FALSE);
1733         stf_parse_options_csv_set_trim_seps (res, FALSE);
1734         stf_parse_options_csv_set_stringindicator (res, stringind);
1735
1736         lines_chunk = g_string_chunk_new (100 * 1024);
1737         lines = stf_parse_lines (res, lines_chunk, data, 1000, FALSE);
1738
1739         /*
1740          * Find a line containing a quote; skip first line unless it is
1741          * the only one.  Prefer a line with the quote first.
1742          */
1743         for (pass = 1; !quoteline && pass <= 2; pass++) {
1744                 size_t lno;
1745                 for (lno = MIN (1, lines->len - 1);
1746                      !quoteline && lno < lines->len;
1747                      lno++) {
1748                         GPtrArray *boxline = g_ptr_array_index (lines, lno);
1749                         const char *line = g_ptr_array_index (boxline, 0);
1750                         switch (pass) {
1751                         case 1:
1752                                 if (g_utf8_get_char (line) == stringind)
1753                                         quoteline = line;
1754                                 break;
1755                         case 2:
1756                                 if (my_utf8_strchr (line, stringind))
1757                                         quoteline = line;
1758                                 break;
1759                         }
1760                 }
1761         }
1762
1763         if (quoteline) {
1764                 const char *p0 = my_utf8_strchr (quoteline, stringind);
1765                 const char *p = p0;
1766
1767                 do {
1768                         p = g_utf8_next_char (p);
1769                 } while (*p && g_utf8_get_char (p) != stringind);
1770                 if (*p) p = g_utf8_next_char (p);
1771                 while (*p && g_unichar_isspace (g_utf8_get_char (p)))
1772                         p = g_utf8_next_char (p);
1773                 if (*p) {
1774                         /* Use the character after the quote.  */
1775                         sep = g_strndup (p, g_utf8_next_char (p) - p);
1776                 } else {
1777                         /* Try to use character before the quote.  */
1778                         while (p0 > quoteline && !sep) {
1779                                 p = p0;
1780                                 p0 = g_utf8_prev_char (p0);
1781                                 if (!g_unichar_isspace (g_utf8_get_char (p0)))
1782                                         sep = g_strndup (p0, p - p0);
1783                         }
1784                 }
1785         }
1786
1787         if (!sep)
1788                 sep = g_strdup (",");
1789         stf_parse_options_csv_set_separators (res, sep, NULL);
1790         g_free (sep);
1791
1792         stf_parse_general_free (lines);
1793         g_string_chunk_free (lines_chunk);
1794
1795         stf_parse_options_guess_formats (res, data);
1796
1797         if (gnm_debug_flag ("stf"))
1798                 dump_guessed_options (res);
1799
1800         return res;
1801 }
1802
1803 typedef enum {
1804         STF_GUESS_DATE_DMY = 1,
1805         STF_GUESS_DATE_MDY = 2,
1806         STF_GUESS_DATE_YMD = 4,
1807
1808         STF_GUESS_NUMBER_DEC_POINT = 0x10,
1809         STF_GUESS_NUMBER_DEC_COMMA = 0x20,
1810         STF_GUESS_NUMBER_DEC_EITHER = 0x30,
1811
1812         STF_GUESS_ALL = 0x37
1813 } StfGuessFormats;
1814
1815 static void
1816 do_check_date (const char *data, StfGuessFormats flag,
1817                gboolean mbd, gboolean ybm,
1818                unsigned *possible,
1819                GODateConventions const *date_conv)
1820 {
1821         GnmValue *v;
1822         gboolean this_mbd, this_ybm;
1823         int imbd;
1824
1825         if (!(*possible & flag))
1826                 return;
1827
1828         v = format_match_datetime (data, date_conv, mbd, TRUE, FALSE);
1829         if (!v || !VALUE_FMT (v))
1830                 goto fail;
1831
1832         imbd = go_format_month_before_day (VALUE_FMT (v));
1833         this_mbd = (imbd >= 1);
1834         this_ybm = (imbd == 2);
1835         if (mbd != this_mbd || ybm != this_ybm)
1836                 goto fail;
1837
1838         goto done;
1839
1840 fail:
1841         *possible &= ~flag;
1842 done:
1843         value_release (v);
1844 }
1845
1846
1847 static void
1848 do_check_number (const char *data, StfGuessFormats flag,
1849                  const GString *dec, const GString *thousand, const GString *curr,
1850                  unsigned *possible, int *decimals)
1851 {
1852         GnmValue *v;
1853         GOFormatFamily family;
1854         const char *pthou;
1855
1856         if (!(*possible & flag))
1857                 return;
1858
1859         v = format_match_decimal_number_with_locale (data, &family, curr, thousand, dec);
1860         if (!v)
1861                 goto fail;
1862
1863         if (*decimals != -2) {
1864                 const char *pdec = strstr (data, dec->str);
1865                 int this_decimals = 0;
1866                 if (pdec) {
1867                         pdec += dec->len;
1868                         while (g_ascii_isdigit (*pdec)) {
1869                                 pdec++;
1870                                 this_decimals++;
1871                         }
1872                 }
1873                 if (*decimals == -1)
1874                         *decimals = this_decimals;
1875                 else if (*decimals != this_decimals)
1876                         *decimals = -2;
1877         }
1878
1879         pthou = strstr (data, thousand->str);
1880         if (pthou) {
1881                 const char *p;
1882                 int digits = 0, nonzero_digits = 0;
1883                 for (p = data; p < pthou; p = g_utf8_next_char (p)) {
1884                         if (g_unichar_isdigit (g_utf8_get_char (p))) {
1885                                 digits++;
1886                                 if (*p != '0')
1887                                         nonzero_digits++;
1888                         }
1889                 }
1890                 // "-.222" implies that "." is not a thousands separator.
1891                 // "0.222" implies that "." is not a thousands separator.
1892                 // "12345,555" implies that "," is not a thousands separator.
1893                 if (nonzero_digits == 0 || digits > 3)
1894                         goto fail;
1895         }
1896
1897         goto done;
1898
1899 fail:
1900         *possible &= ~flag;
1901 done:
1902         value_release (v);
1903 }
1904
1905
1906 /**
1907  * stf_parse_options_guess_formats:
1908  * @data: the CSV input data.
1909  *
1910  * This function attempts to recognize data formats on a column-by-column
1911  * basis under the assumption that the data in a text file will generally
1912  * use the same data formats.
1913  *
1914  * This is useful because not all values give sufficient information by
1915  * themselves to tell what format the data is in.  For example, "1/2/2000"
1916  * is likely to be a date in year 2000, but it is not clear if it is in
1917  * January or February.  If another value in the same column is "31/1/1999"
1918  * then it is likely that the former date was in February.
1919  *
1920  * Likewise, a value of "123,456" could mean either 1.23456e5 or 1.23456e2.
1921  * A later value of "111,200.22" would clear up the confusion.
1922  *
1923  **/
1924 void
1925 stf_parse_options_guess_formats (StfParseOptions_t *po, char const *data)
1926 {
1927         GStringChunk *lines_chunk;
1928         GPtrArray *lines;
1929         unsigned lno, col, colcount, sline;
1930         GODateConventions const *date_conv = go_date_conv_from_str ("Lotus:1900");
1931         GString *s_comma = g_string_new (",");
1932         GString *s_dot = g_string_new (".");
1933         GString *s_dollar = g_string_new ("$");
1934         gboolean debug = gnm_debug_flag ("stf");
1935
1936         g_ptr_array_set_size (po->formats, 0);
1937         g_ptr_array_set_size (po->formats_decimal, 0);
1938         g_ptr_array_set_size (po->formats_thousand, 0);
1939         g_ptr_array_set_size (po->formats_curr, 0);
1940
1941         lines_chunk = g_string_chunk_new (100 * 1024);
1942         lines = stf_parse_general (po, lines_chunk, data, data + strlen (data));
1943
1944         colcount = 0;
1945         for (lno = 0; lno < lines->len; lno++) {
1946                 GPtrArray *line = g_ptr_array_index (lines, lno);
1947                 colcount = MAX (colcount, line->len);
1948         }
1949
1950         // Ignore first line unless it is the only one
1951         sline = MIN ((int)lines->len - 1, 1);
1952
1953         g_ptr_array_set_size (po->formats, colcount);
1954         g_ptr_array_set_size (po->formats_decimal, colcount);
1955         g_ptr_array_set_size (po->formats_thousand, colcount);
1956         g_ptr_array_set_size (po->formats_curr, colcount);
1957         for (col = 0; col < colcount; col++) {
1958                 unsigned possible = STF_GUESS_ALL;
1959                 GOFormat *fmt = NULL;
1960                 gboolean seen_dot = FALSE;
1961                 gboolean seen_comma = FALSE;
1962                 int decimals_if_point = -1; // -1: unset; -2: inconsistent; >=0: count
1963                 int decimals_if_comma = -1; // -1: unset; -2: inconsistent; >=0: count
1964
1965                 for (lno = sline; possible && lno < lines->len; lno++) {
1966                         GPtrArray *line = g_ptr_array_index (lines, lno);
1967                         const char *data = col < line->len ? g_ptr_array_index (line, col) : "";
1968                         unsigned prev_possible = possible;
1969
1970                         if (*data == 0 || data[0] == '\'')
1971                                 continue;
1972
1973                         do_check_date (data, STF_GUESS_DATE_DMY, FALSE, FALSE, &possible, date_conv);
1974                         do_check_date (data, STF_GUESS_DATE_MDY, TRUE, FALSE, &possible, date_conv);
1975                         do_check_date (data, STF_GUESS_DATE_YMD, TRUE, TRUE, &possible, date_conv);
1976
1977                         if ((possible & STF_GUESS_NUMBER_DEC_EITHER) == STF_GUESS_NUMBER_DEC_EITHER) {
1978                                 const char *pdot = strstr (data, s_dot->str);
1979                                 const char *pcomma = strstr (data, s_comma->str);
1980                                 if (pdot && pcomma) {
1981                                         // Both -- last one is the decimal separator
1982                                         if (pdot > pcomma)
1983                                                 possible &= ~STF_GUESS_NUMBER_DEC_COMMA;
1984                                         else
1985                                                 possible &= ~STF_GUESS_NUMBER_DEC_POINT;
1986                                 } else if (pdot && strstr (pdot + s_dot->len, s_dot->str)) {
1987                                         // Two dots so they are thousands separators
1988                                         possible &= ~STF_GUESS_NUMBER_DEC_POINT;
1989                                 } else if (pcomma && strstr (pcomma + s_comma->len, s_comma->str)) {
1990                                         // Two commas so they are thousands separators
1991                                         possible &= ~STF_GUESS_NUMBER_DEC_COMMA;
1992                                 }
1993
1994                                 seen_dot = seen_dot || (pdot != 0);
1995                                 seen_comma = seen_comma || (pcomma != 0);
1996                         }
1997                         do_check_number (data, STF_GUESS_NUMBER_DEC_POINT,
1998                                          s_dot, s_comma, s_dollar,
1999                                          &possible, &decimals_if_point);
2000                         do_check_number (data, STF_GUESS_NUMBER_DEC_COMMA,
2001                                          s_comma, s_dot, s_dollar,
2002                                          &possible, &decimals_if_comma);
2003
2004                         if (possible != prev_possible && debug)
2005                                 g_printerr ("col=%d; after [%s] possible=0x%x\n", col, data, possible);
2006                 }
2007
2008                 if ((possible & STF_GUESS_NUMBER_DEC_EITHER) == STF_GUESS_NUMBER_DEC_EITHER &&
2009                     !seen_dot && !seen_comma) {
2010                         // It doesn't matter what the separators are
2011                         possible &= ~STF_GUESS_NUMBER_DEC_COMMA;
2012                 }
2013
2014                 switch (possible) {
2015                 case STF_GUESS_DATE_DMY:
2016                         fmt = go_format_new_from_XL ("d-mmm-yyyy");
2017                         break;
2018                 case STF_GUESS_DATE_MDY:
2019                         fmt = go_format_new_from_XL ("m/d/yyyy");
2020                         break;
2021                 case STF_GUESS_DATE_YMD:
2022                         fmt = go_format_new_from_XL ("yyyy-mm-dd");
2023                         break;
2024                 case STF_GUESS_NUMBER_DEC_POINT:
2025                         g_ptr_array_index (po->formats_decimal, col) = g_string_new (".");
2026                         g_ptr_array_index (po->formats_thousand, col) = g_string_new (",");
2027                         g_ptr_array_index (po->formats_curr, col) = g_string_new (s_dollar->str);
2028                         if (decimals_if_point > 0) {
2029                                 // Don't set format if decimals is zero
2030                                 GString *fmt_str = g_string_new (NULL);
2031                                 go_format_generate_number_str (fmt_str, 1, decimals_if_point, seen_comma, FALSE, FALSE, "", "");
2032                                 fmt = go_format_new_from_XL (fmt_str->str);
2033                                 g_string_free (fmt_str, TRUE);
2034                         }
2035                         break;
2036                 case STF_GUESS_NUMBER_DEC_COMMA:
2037                         g_ptr_array_index (po->formats_decimal, col) = g_string_new (",");
2038                         g_ptr_array_index (po->formats_thousand, col) = g_string_new (".");
2039                         g_ptr_array_index (po->formats_curr, col) = g_string_new (s_dollar->str);
2040                         if (decimals_if_comma > 0) {
2041                                 // Don't set format if decimals is zero
2042                                 GString *fmt_str = g_string_new (NULL);
2043                                 go_format_generate_number_str (fmt_str, 1, decimals_if_comma, seen_dot, FALSE, FALSE, "", "");
2044                                 fmt = go_format_new_from_XL (fmt_str->str);
2045                                 g_string_free (fmt_str, TRUE);
2046                         }
2047                         break;
2048                 default:
2049                         break;
2050                 }
2051
2052                 if (!fmt)
2053                         fmt = go_format_ref (go_format_general ());
2054                 g_ptr_array_index (po->formats, col) = fmt;
2055         }
2056
2057         stf_parse_general_free (lines);
2058         g_string_chunk_free (lines_chunk);
2059
2060         g_string_free (s_dot, TRUE);
2061         g_string_free (s_comma, TRUE);
2062         g_string_free (s_dollar, TRUE);
2063 }