src/stf-parse.c

   1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
   2 /*
   3  * stf-parse.c : Structured Text Format parser. (STF)
   4  *               A general purpose engine for parsing data
   5  *               in CSV and Fixed width format.
   6  *
   7  *
   8  * Copyright (C) Almer. S. Tigelaar.
   9  * EMail: almer1@dds.nl or almer-t@bigfoot.com
  10  *
  11  * Copyright (C) 2003 Andreas J. Guelzow <aguelzow@taliesin.ca>
  12  * Copyright (C) 2003,2008-2009 Morten Welinder <terra@gnome.org>
  13  *
  14  * This program is free software; you can redistribute it and/or modify
  15  * it under the terms of the GNU General Public License as published by
  16  * the Free Software Foundation; either version 2 of the License, or
  17  * (at your option) any later version.
  18  *
  19  * This program is distributed in the hope that it will be useful,
  20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22  * GNU General Public License for more details.
  23  *
  24  * You should have received a copy of the GNU General Public License
  25  * along with this program; if not, see <https://www.gnu.org/licenses/>.
  26  */
  27
  28 #include <gnumeric-config.h>
  29 #include <glib/gi18n-lib.h>
  30 #include "gnumeric.h"
  31 #include "stf-parse.h"
  32 #include "stf-export.h"
  33
  34 #include "workbook.h"
  35 #include "cell.h"
  36 #include "sheet.h"
  37 #include "expr.h"
  38 #include "clipboard.h"
  39 #include "sheet-style.h"
  40 #include "value.h"
  41 #include "mstyle.h"
  42 #include "number-match.h"
  43 #include "gutils.h"
  44 #include "parse-util.h"
  45 #include "number-match.h"
  46 #include "gnm-format.h"
  47 #include "ranges.h"
  48 #include <goffice/goffice.h>
  49
  50 #include <stdlib.h>
  51 #include <locale.h>
  52 #include <string.h>
  53
  54 #define SETUP_LOCALE_SWITCH char *oldlocale = NULL
  55
  56 #define START_LOCALE_SWITCH if (parseoptions->locale) {\
  57 oldlocale = g_strdup(go_setlocale (LC_ALL, NULL)); \
  58 go_setlocale(LC_ALL, parseoptions->locale);}
  59
  60 #define END_LOCALE_SWITCH if (oldlocale) {\
  61 go_setlocale(LC_ALL, oldlocale);\
  62 g_free (oldlocale);}
  63
  64 /* Source_t struct, used for interchanging parsing information between the low level parse functions */
  65 typedef struct {
  66         GStringChunk *chunk;
  67         char const *position;  /* Indicates the current position within data */
  68
  69         /* Used internally for fixed width parsing */
  70         int splitpos;          /* Indicates current position in splitpositions array */
  71         int linepos;           /* Position on the current line */
  72 } Source_t;
  73
  74 /* Struct used for autodiscovery */
  75 typedef struct {
  76         int start;
  77         int stop;
  78 } AutoDiscovery_t;
  79
  80 /*
  81  * Some silly dude make the length field an unsigned int.  C just does
  82  * not deal very well with that.
  83  */
  84 static inline int
  85 my_garray_len (GArray const *a)
  86 {
  87         return (int)a->len;
  88 }
  89
  90 static char *
  91 my_utf8_strchr (const char *p, gunichar uc)
  92 {
  93         return uc < 0x7f ? strchr (p, uc) : g_utf8_strchr (p, -1, uc);
  94 }
  95
  96 static int
  97 compare_terminator (char const *s, StfParseOptions_t *parseoptions)
  98 {
  99         guchar const *us = (guchar const *)s;
 100         GSList *l;
 101
 102         if (*us > parseoptions->compiled_terminator.max ||
 103             *us < parseoptions->compiled_terminator.min)
 104                 return 0;
 105
 106         for (l = parseoptions->terminator; l; l = l->next) {
 107                 char const *term = l->data;
 108                 char const *d = s;
 109
 110                 while (*term) {
 111                         if (*d != *term)
 112                                 goto next;
 113                         term++;
 114                         d++;
 115                 }
 116                 return d - s;
 117
 118         next:
 119                 ;
 120         }
 121         return 0;
 122 }
 123
 124
 125 /*******************************************************************************************************
 126  * STF PARSE OPTIONS : StfParseOptions related
 127  *******************************************************************************************************/
 128
 129 static void
 130 gnm_g_string_free (GString *s)
 131 {
 132         if (s) g_string_free (s, TRUE);
 133 }
 134
 135
 136 /**
 137  * stf_parse_options_new:
 138  *
 139  * This will return a new StfParseOptions_t struct.
 140  * The struct should, after being used, freed with stf_parse_options_free.
 141  **/
 142 static StfParseOptions_t *
 143 stf_parse_options_new (void)
 144 {
 145         StfParseOptions_t* parseoptions = g_new0 (StfParseOptions_t, 1);
 146
 147         parseoptions->parsetype   = PARSE_TYPE_NOTSET;
 148
 149         parseoptions->terminator  = NULL;
 150         stf_parse_options_add_line_terminator (parseoptions, "\r\n");
 151         stf_parse_options_add_line_terminator (parseoptions, "\n");
 152         stf_parse_options_add_line_terminator (parseoptions, "\r");
 153
 154         parseoptions->trim_spaces = (TRIM_TYPE_RIGHT | TRIM_TYPE_LEFT);
 155         parseoptions->locale = NULL;
 156
 157         parseoptions->splitpositions = NULL;
 158         stf_parse_options_fixed_splitpositions_clear (parseoptions);
 159
 160         parseoptions->stringindicator = '"';
 161         parseoptions->indicator_2x_is_single = TRUE;
 162         parseoptions->sep.duplicates = FALSE;
 163         parseoptions->trim_seps = FALSE;
 164
 165         parseoptions->sep.str = NULL;
 166         parseoptions->sep.chr = NULL;
 167
 168         parseoptions->col_autofit_array = NULL;
 169         parseoptions->col_import_array = NULL;
 170         parseoptions->col_import_array_len = 0;
 171         parseoptions->formats = g_ptr_array_new_with_free_func ((GDestroyNotify)go_format_unref);
 172         parseoptions->formats_decimal = g_ptr_array_new_with_free_func ((GDestroyNotify)gnm_g_string_free);
 173         parseoptions->formats_thousand = g_ptr_array_new_with_free_func ((GDestroyNotify)gnm_g_string_free);
 174         parseoptions->formats_curr = g_ptr_array_new_with_free_func ((GDestroyNotify)gnm_g_string_free);
 175
 176         parseoptions->cols_exceeded = FALSE;
 177         parseoptions->rows_exceeded = FALSE;
 178         parseoptions->ref_count = 1;
 179
 180         return parseoptions;
 181 }
 182
 183 /**
 184  * stf_parse_options_free:
 185  *
 186  * will free @parseoptions, note that this will not free the splitpositions
 187  * member (GArray) of the struct, the caller is responsible for that.
 188  **/
 189 void
 190 stf_parse_options_free (StfParseOptions_t *parseoptions)
 191 {
 192         g_return_if_fail (parseoptions != NULL);
 193
 194         if (parseoptions->ref_count-- > 1)
 195                 return;
 196
 197         g_free (parseoptions->col_import_array);
 198         g_free (parseoptions->col_autofit_array);
 199         g_free (parseoptions->locale);
 200         g_free (parseoptions->sep.chr);
 201
 202         if (parseoptions->sep.str) {
 203                 GSList *l;
 204
 205                 for (l = parseoptions->sep.str; l != NULL; l = l->next)
 206                         g_free ((char *) l->data);
 207                 g_slist_free (parseoptions->sep.str);
 208         }
 209
 210         g_array_free (parseoptions->splitpositions, TRUE);
 211
 212         stf_parse_options_clear_line_terminator (parseoptions);
 213
 214         g_ptr_array_free (parseoptions->formats, TRUE);
 215         g_ptr_array_free (parseoptions->formats_decimal, TRUE);
 216         g_ptr_array_free (parseoptions->formats_thousand, TRUE);
 217         g_ptr_array_free (parseoptions->formats_curr, TRUE);
 218
 219         g_free (parseoptions);
 220 }
 221
 222 static StfParseOptions_t *
 223 stf_parse_options_ref (StfParseOptions_t *parseoptions)
 224 {
 225         parseoptions->ref_count++;
 226         return parseoptions;
 227 }
 228
 229 GType
 230 stf_parse_options_get_type (void)
 231 {
 232         static GType t = 0;
 233
 234         if (t == 0) {
 235                 t = g_boxed_type_register_static ("StfParseOptions_t",
 236                          (GBoxedCopyFunc)stf_parse_options_ref,
 237                          (GBoxedFreeFunc)stf_parse_options_free);
 238         }
 239         return t;
 240 }
 241
 242 void
 243 stf_parse_options_set_type (StfParseOptions_t *parseoptions, StfParseType_t const parsetype)
 244 {
 245         g_return_if_fail (parseoptions != NULL);
 246         g_return_if_fail (parsetype == PARSE_TYPE_CSV || parsetype == PARSE_TYPE_FIXED);
 247
 248         parseoptions->parsetype = parsetype;
 249 }
 250
 251 static gint
 252 long_string_first (gchar const *a, gchar const *b)
 253 {
 254         /* This actually is UTF-8 safe.  */
 255         return strlen (b) - strlen (a);
 256 }
 257
 258 static void
 259 compile_terminators (StfParseOptions_t *parseoptions)
 260 {
 261         GSList *l;
 262         GO_SLIST_SORT (parseoptions->terminator, (GCompareFunc)long_string_first);
 263
 264         parseoptions->compiled_terminator.min = 255;
 265         parseoptions->compiled_terminator.max = 0;
 266         for (l = parseoptions->terminator; l; l = l->next) {
 267                 const guchar *term = l->data;
 268                 parseoptions->compiled_terminator.min =
 269                         MIN (parseoptions->compiled_terminator.min, *term);
 270                 parseoptions->compiled_terminator.max =
 271                         MAX (parseoptions->compiled_terminator.max, *term);
 272         }
 273 }
 274
 275 /**
 276  * stf_parse_options_add_line_terminator:
 277  *
 278  * This will add to the line terminators, in both the Fixed width and CSV delimited importers
 279  * this indicates the end of a row.
 280  *
 281  **/
 282 void
 283 stf_parse_options_add_line_terminator (StfParseOptions_t *parseoptions, char const *terminator)
 284 {
 285         g_return_if_fail (parseoptions != NULL);
 286         g_return_if_fail (terminator != NULL && *terminator != 0);
 287
 288         GO_SLIST_PREPEND (parseoptions->terminator, g_strdup (terminator));
 289         compile_terminators (parseoptions);
 290 }
 291
 292 /**
 293  * stf_parse_options_clear_line_terminator:
 294  *
 295  * This will clear the line terminator, in both the Fixed width and CSV delimited importers
 296  * this indicates the end of a row.
 297  *
 298  **/
 299 void
 300 stf_parse_options_clear_line_terminator (StfParseOptions_t *parseoptions)
 301 {
 302         g_return_if_fail (parseoptions != NULL);
 303
 304         g_slist_free_full (parseoptions->terminator, g_free);
 305         parseoptions->terminator = NULL;
 306         compile_terminators (parseoptions);
 307 }
 308
 309 /**
 310  * stf_parse_options_set_trim_spaces:
 311  *
 312  * If enabled will trim spaces in every parsed field on left and/or right
 313  * sides.
 314  **/
 315 void
 316 stf_parse_options_set_trim_spaces (StfParseOptions_t *parseoptions, StfTrimType_t const trim_spaces)
 317 {
 318         g_return_if_fail (parseoptions != NULL);
 319
 320         parseoptions->trim_spaces = trim_spaces;
 321 }
 322
 323 /**
 324  * stf_parse_options_csv_set_separators:
 325  * @parseoptions: #StfParseOptions_t
 326  * @character:
 327  * @seps: (element-type utf8): the separators to be used
 328  *
 329  * A copy is made of the parameters.
 330  **/
 331 void
 332 stf_parse_options_csv_set_separators (StfParseOptions_t *parseoptions,
 333                                       char const *character,
 334                                       GSList const *seps)
 335 {
 336         g_return_if_fail (parseoptions != NULL);
 337
 338         g_free (parseoptions->sep.chr);
 339         parseoptions->sep.chr = g_strdup (character);
 340
 341         g_slist_free_full (parseoptions->sep.str, g_free);
 342         parseoptions->sep.str = go_slist_map (seps, (GOMapFunc)g_strdup);
 343 }
 344
 345 void
 346 stf_parse_options_csv_set_stringindicator (StfParseOptions_t *parseoptions, gunichar const stringindicator)
 347 {
 348         g_return_if_fail (parseoptions != NULL);
 349
 350         parseoptions->stringindicator = stringindicator;
 351 }
 352
 353 /**
 354  * stf_parse_options_csv_set_indicator_2x_is_single:
 355  * @indic_2x: a boolean value indicating whether we want to see two
 356  *              adjacent string indicators as a single string indicator
 357  *              that is part of the cell, rather than a terminator.
 358  **/
 359 void
 360 stf_parse_options_csv_set_indicator_2x_is_single (StfParseOptions_t *parseoptions,
 361                                                   gboolean const indic_2x)
 362 {
 363         g_return_if_fail (parseoptions != NULL);
 364
 365         parseoptions->indicator_2x_is_single = indic_2x;
 366 }
 367
 368 /**
 369  * stf_parse_options_csv_set_duplicates:
 370  * @parseoptions:
 371  * @duplicates: a boolean value indicating whether we want to see two
 372  *               separators right behind each other as one
 373  **/
 374 void
 375 stf_parse_options_csv_set_duplicates (StfParseOptions_t *parseoptions, gboolean const duplicates)
 376 {
 377         g_return_if_fail (parseoptions != NULL);
 378
 379         parseoptions->sep.duplicates = duplicates;
 380 }
 381
 382 /**
 383  * stf_parse_options_csv_set_trim_seps:
 384  * @trim_seps: a boolean value indicating whether we want to ignore
 385  *               separators at the beginning of lines
 386  **/
 387 void
 388 stf_parse_options_csv_set_trim_seps (StfParseOptions_t *parseoptions, gboolean const trim_seps)
 389 {
 390         g_return_if_fail (parseoptions != NULL);
 391
 392         parseoptions->trim_seps = trim_seps;
 393 }
 394
 395 /**
 396  * stf_parse_options_fixed_splitpositions_clear:
 397  *
 398  * This will clear the splitpositions (== points on which a line is split)
 399  **/
 400 void
 401 stf_parse_options_fixed_splitpositions_clear (StfParseOptions_t *parseoptions)
 402 {
 403         int minus_one = -1;
 404         g_return_if_fail (parseoptions != NULL);
 405
 406         if (parseoptions->splitpositions)
 407                 g_array_free (parseoptions->splitpositions, TRUE);
 408         parseoptions->splitpositions = g_array_new (FALSE, FALSE, sizeof (int));
 409
 410         g_array_append_val (parseoptions->splitpositions, minus_one);
 411 }
 412
 413 /**
 414  * stf_parse_options_fixed_splitpositions_add:
 415  *
 416  * @position will be added to the splitpositions.
 417  **/
 418 void
 419 stf_parse_options_fixed_splitpositions_add (StfParseOptions_t *parseoptions, int position)
 420 {
 421         unsigned int ui;
 422
 423         g_return_if_fail (parseoptions != NULL);
 424         g_return_if_fail (position >= 0);
 425
 426         for (ui = 0; ui < parseoptions->splitpositions->len - 1; ui++) {
 427                 int here = g_array_index (parseoptions->splitpositions, int, ui);
 428                 if (position == here)
 429                         return;
 430                 if (position < here)
 431                         break;
 432         }
 433
 434         g_array_insert_val (parseoptions->splitpositions, ui, position);
 435 }
 436
 437 void
 438 stf_parse_options_fixed_splitpositions_remove (StfParseOptions_t *parseoptions, int position)
 439 {
 440         unsigned int ui;
 441
 442         g_return_if_fail (parseoptions != NULL);
 443         g_return_if_fail (position >= 0);
 444
 445         for (ui = 0; ui < parseoptions->splitpositions->len - 1; ui++) {
 446                 int here = g_array_index (parseoptions->splitpositions, int, ui);
 447                 if (position == here)
 448                         g_array_remove_index (parseoptions->splitpositions, ui);
 449                 if (position <= here)
 450                         return;
 451         }
 452 }
 453
 454 int
 455 stf_parse_options_fixed_splitpositions_count (StfParseOptions_t *parseoptions)
 456 {
 457         return parseoptions->splitpositions->len;
 458 }
 459
 460 int
 461 stf_parse_options_fixed_splitpositions_nth (StfParseOptions_t *parseoptions, int n)
 462 {
 463         return g_array_index (parseoptions->splitpositions, int, n);
 464 }
 465
 466
 467 /**
 468  * stf_parse_options_valid:
 469  * @parseoptions: an import options struct
 470  *
 471  * Checks if @parseoptions is correctly filled
 472  *
 473  * returns : TRUE if it is correctly filled, FALSE otherwise.
 474  **/
 475 static gboolean
 476 stf_parse_options_valid (StfParseOptions_t *parseoptions)
 477 {
 478         g_return_val_if_fail (parseoptions != NULL, FALSE);
 479
 480         if (parseoptions->parsetype == PARSE_TYPE_FIXED) {
 481                 if (!parseoptions->splitpositions) {
 482                         g_warning ("STF: No splitpositions in struct");
 483                         return FALSE;
 484                 }
 485         }
 486
 487         return TRUE;
 488 }
 489
 490 /*******************************************************************************************************
 491  * STF PARSE : The actual routines that do the 'trick'
 492  *******************************************************************************************************/
 493
 494 static void
 495 trim_spaces_inplace (char *field, StfParseOptions_t const *parseoptions)
 496 {
 497         if (!field) return;
 498
 499         if (parseoptions->trim_spaces & TRIM_TYPE_LEFT) {
 500                 char *s = field;
 501
 502                 while (g_unichar_isspace (g_utf8_get_char (s)))
 503                         s = g_utf8_next_char (s);
 504
 505                 if (s != field)
 506                         memmove (field, s, 1 + strlen (s));
 507         }
 508
 509         if (parseoptions->trim_spaces & TRIM_TYPE_RIGHT) {
 510                 char *s = field + strlen (field);
 511
 512                 while (field != s) {
 513                         s = g_utf8_prev_char (s);
 514                         if (!g_unichar_isspace (g_utf8_get_char (s)))
 515                                 break;
 516                         *s = 0;
 517                 }
 518         }
 519 }
 520
 521 /**
 522  * stf_parse_csv_is_separator:
 523  *
 524  * returns NULL if @character is not a separator, a pointer to the character
 525  * after the separator otherwise.
 526  **/
 527 static char const *
 528 stf_parse_csv_is_separator (char const *character, char const *chr, GSList const *str)
 529 {
 530         g_return_val_if_fail (character != NULL, NULL);
 531
 532         if (*character == 0)
 533                 return NULL;
 534
 535         if (str) {
 536                 GSList const *l;
 537
 538                 for (l = str; l != NULL; l = l->next) {
 539                         char const *s = l->data;
 540                         char const *r;
 541                         glong cnt;
 542                         glong const len = g_utf8_strlen (s, -1);
 543
 544                         /* Don't compare past the end of the buffer! */
 545                         for (r = character, cnt = 0; cnt < len; cnt++, r = g_utf8_next_char (r))
 546                                 if (*r == '\0')
 547                                         break;
 548
 549                         if ((cnt == len) && (memcmp (character, s, len) == 0))
 550                                 return g_utf8_offset_to_pointer (character, len);
 551                 }
 552         }
 553
 554         if (chr && my_utf8_strchr (chr, g_utf8_get_char (character)))
 555                 return g_utf8_next_char(character);
 556
 557         return NULL;
 558 }
 559
 560 /*
 561  * stf_parse_eat_separators:
 562  *
 563  * skip over leading separators
 564  *
 565  */
 566
 567 static void
 568 stf_parse_eat_separators (Source_t *src, StfParseOptions_t *parseoptions)
 569 {
 570         char const *cur, *next;
 571
 572         g_return_if_fail (src != NULL);
 573         g_return_if_fail (parseoptions != NULL);
 574
 575         cur = src->position;
 576
 577         if (*cur == '\0' || compare_terminator (cur, parseoptions))
 578                 return;
 579         while ((next = stf_parse_csv_is_separator (cur, parseoptions->sep.chr, parseoptions->sep.str)))
 580                 cur = next;
 581         src->position = cur;
 582         return;
 583 }
 584
 585
 586 typedef enum {
 587         STF_CELL_ERROR,
 588         STF_CELL_EOF,
 589         STF_CELL_EOL,
 590         STF_CELL_FIELD_NO_SEP,
 591         STF_CELL_FIELD_SEP
 592 } StfParseCellRes;
 593
 594 static StfParseCellRes
 595 stf_parse_csv_cell (GString *text, Source_t *src, StfParseOptions_t *parseoptions)
 596 {
 597         char const *cur;
 598         gboolean saw_sep = FALSE;
 599
 600         g_return_val_if_fail (src != NULL, STF_CELL_ERROR);
 601         g_return_val_if_fail (parseoptions != NULL, STF_CELL_ERROR);
 602
 603         cur = src->position;
 604         g_return_val_if_fail (cur != NULL, STF_CELL_ERROR);
 605
 606         /* Skip whitespace, but stop at line terminators.  */
 607         while (1) {
 608                 int term_len;
 609
 610                 if (*cur == 0) {
 611                         src->position = cur;
 612                         return STF_CELL_EOF;
 613                 }
 614
 615                 term_len = compare_terminator (cur, parseoptions);
 616                 if (term_len) {
 617                         src->position = cur + term_len;
 618                         return STF_CELL_EOL;
 619                 }
 620
 621                 if ((parseoptions->trim_spaces & TRIM_TYPE_LEFT) == 0)
 622                         break;
 623
 624                 if (stf_parse_csv_is_separator (cur, parseoptions->sep.chr,
 625                                                 parseoptions->sep.str))
 626                         break;
 627
 628                 if (!g_unichar_isspace (g_utf8_get_char (cur)))
 629                         break;
 630                 cur = g_utf8_next_char (cur);
 631         }
 632
 633         if (parseoptions->stringindicator != 0 &&
 634             g_utf8_get_char (cur) == parseoptions->stringindicator) {
 635                 cur = g_utf8_next_char (cur);
 636                 while (*cur) {
 637                         gunichar uc = g_utf8_get_char (cur);
 638                         cur = g_utf8_next_char (cur);
 639
 640                         if (uc == parseoptions->stringindicator) {
 641                                 if (parseoptions->indicator_2x_is_single &&
 642                                     g_utf8_get_char (cur) == parseoptions->stringindicator)
 643                                         cur = g_utf8_next_char (cur);
 644                                 else {
 645                                         /* "field content"dropped-garbage,  */
 646                                         while (*cur && !compare_terminator (cur, parseoptions)) {
 647                                                 char const *post = stf_parse_csv_is_separator
 648                                                         (cur, parseoptions->sep.chr, parseoptions->sep.str);
 649                                                 if (post) {
 650                                                         cur = post;
 651                                                         saw_sep = TRUE;
 652                                                         break;
 653                                                 }
 654                                                 cur = g_utf8_next_char (cur);
 655                                         }
 656                                         break;
 657                                 }
 658                         }
 659
 660                         g_string_append_unichar (text, uc);
 661                 }
 662
 663                 /* We silently allow a missing terminating quote.  */
 664         } else {
 665                 /* Unquoted field.  */
 666
 667                 while (*cur && !compare_terminator (cur, parseoptions)) {
 668
 669                         char const *post = stf_parse_csv_is_separator
 670                                 (cur, parseoptions->sep.chr, parseoptions->sep.str);
 671                         if (post) {
 672                                 cur = post;
 673                                 saw_sep = TRUE;
 674                                 break;
 675                         }
 676
 677                         g_string_append_unichar (text, g_utf8_get_char (cur));
 678                         cur = g_utf8_next_char (cur);
 679                 }
 680
 681                 if (parseoptions->trim_spaces & TRIM_TYPE_RIGHT) {
 682                         while (text->len) {
 683                                 const char *last = g_utf8_prev_char (text->str + text->len);
 684                                 if (!g_unichar_isspace (g_utf8_get_char (last)))
 685                                         break;
 686                                 g_string_truncate (text, last - text->str);
 687                         }
 688                 }
 689         }
 690
 691         src->position = cur;
 692
 693         if (saw_sep && parseoptions->sep.duplicates)
 694                 stf_parse_eat_separators (src, parseoptions);
 695
 696         return saw_sep ? STF_CELL_FIELD_SEP : STF_CELL_FIELD_NO_SEP;
 697 }
 698
 699 /**
 700  * stf_parse_csv_line:
 701  *
 702  * This will parse one line from the current @src->position.
 703  * NOTE: The calling routine is responsible for freeing the result.
 704  *
 705  * returns : a GPtrArray of char*'s
 706  **/
 707 static GPtrArray *
 708 stf_parse_csv_line (Source_t *src, StfParseOptions_t *parseoptions)
 709 {
 710         GPtrArray *line;
 711         gboolean cont = FALSE;
 712         GString *text;
 713
 714         g_return_val_if_fail (src != NULL, NULL);
 715         g_return_val_if_fail (parseoptions != NULL, NULL);
 716
 717         line = g_ptr_array_new ();
 718         if (parseoptions->trim_seps)
 719                 stf_parse_eat_separators (src, parseoptions);
 720
 721         text = g_string_sized_new (30);
 722
 723         while (1) {
 724                 char *ctext;
 725                 StfParseCellRes res =
 726                         stf_parse_csv_cell (text, src, parseoptions);
 727                 trim_spaces_inplace (text->str, parseoptions);
 728                 ctext = g_string_chunk_insert_len (src->chunk,
 729                                                    text->str, text->len);
 730                 g_string_truncate (text, 0);
 731
 732                 switch (res) {
 733                 case STF_CELL_FIELD_NO_SEP:
 734                         g_ptr_array_add (line, ctext);
 735                         cont = FALSE;
 736                         break;
 737
 738                 case STF_CELL_FIELD_SEP:
 739                         g_ptr_array_add (line, ctext);
 740                         cont = TRUE;  /* Make sure we see one more field.  */
 741                         break;
 742
 743                 default:
 744                         if (cont)
 745                                 g_ptr_array_add (line, ctext);
 746                         g_string_free (text, TRUE);
 747                         return line;
 748                 }
 749         }
 750 }
 751
 752 /**
 753  * stf_parse_fixed_cell:
 754  *
 755  * returns a pointer to the parsed cell contents.
 756  **/
 757 static char *
 758 stf_parse_fixed_cell (Source_t *src, StfParseOptions_t *parseoptions)
 759 {
 760         char *res;
 761         char const *cur;
 762         int splitval;
 763
 764         g_return_val_if_fail (src != NULL, NULL);
 765         g_return_val_if_fail (parseoptions != NULL, NULL);
 766
 767         cur = src->position;
 768
 769         if (src->splitpos < my_garray_len (parseoptions->splitpositions))
 770                 splitval = (int) g_array_index (parseoptions->splitpositions, int, src->splitpos);
 771         else
 772                 splitval = -1;
 773
 774         while (*cur != 0 && !compare_terminator (cur, parseoptions) && splitval != src->linepos) {
 775                 src->linepos++;
 776                 cur = g_utf8_next_char (cur);
 777         }
 778
 779         res = g_string_chunk_insert_len (src->chunk,
 780                                          src->position,
 781                                          cur - src->position);
 782
 783         src->position = cur;
 784
 785         return res;
 786 }
 787
 788 /**
 789  * stf_parse_fixed_line:
 790  *
 791  * This will parse one line from the current @src->position.
 792  * It will return a GPtrArray with the cell contents as strings.
 793
 794  * NOTE: The calling routine is responsible for freeing result.
 795  **/
 796 static GPtrArray *
 797 stf_parse_fixed_line (Source_t *src, StfParseOptions_t *parseoptions)
 798 {
 799         GPtrArray *line;
 800
 801         g_return_val_if_fail (src != NULL, NULL);
 802         g_return_val_if_fail (parseoptions != NULL, NULL);
 803
 804         src->linepos = 0;
 805         src->splitpos = 0;
 806
 807         line = g_ptr_array_new ();
 808         while (*src->position != '\0' && !compare_terminator (src->position, parseoptions)) {
 809                 char *field = stf_parse_fixed_cell (src, parseoptions);
 810
 811                 trim_spaces_inplace (field, parseoptions);
 812                 g_ptr_array_add (line, field);
 813
 814                 src->splitpos++;
 815         }
 816
 817         while (line->len < parseoptions->splitpositions->len)
 818                 g_ptr_array_add (line, g_strdup (""));
 819
 820         return line;
 821 }
 822
 823 /**
 824  * stf_parse_general_free: (skip)
 825  */
 826 void
 827 stf_parse_general_free (GPtrArray *lines)
 828 {
 829         unsigned lineno;
 830         for (lineno = 0; lineno < lines->len; lineno++) {
 831                 GPtrArray *line = g_ptr_array_index (lines, lineno);
 832                 /* Fields are not freed here.  */
 833                 if (line)
 834                         g_ptr_array_free (line, TRUE);
 835         }
 836         g_ptr_array_free (lines, TRUE);
 837 }
 838
 839
 840 /**
 841  * stf_parse_general: (skip)
 842  *
 843  * Returns: (transfer full): a GPtrArray of lines, where each line is itself a
 844  * GPtrArray of strings.
 845  *
 846  * The caller must free this entire structure, for example by calling
 847  * stf_parse_general_free.
 848  **/
 849 GPtrArray *
 850 stf_parse_general (StfParseOptions_t *parseoptions,
 851                    GStringChunk *lines_chunk,
 852                    char const *data, char const *data_end)
 853 {
 854         GPtrArray *lines;
 855         Source_t src;
 856         int row;
 857         char const *valid_end = data_end;
 858
 859         g_return_val_if_fail (parseoptions != NULL, NULL);
 860         g_return_val_if_fail (data != NULL, NULL);
 861         g_return_val_if_fail (data_end != NULL, NULL);
 862         g_return_val_if_fail (stf_parse_options_valid (parseoptions), NULL);
 863         g_return_val_if_fail (g_utf8_validate (data, data_end-data, &valid_end), NULL);
 864
 865         src.chunk = lines_chunk;
 866         src.position = data;
 867         row = 0;
 868
 869         if ((data_end-data >= 3) && !strncmp(src.position, "\xEF\xBB\xBF", 3)) {
 870                 /* Skip over byte-order mark */
 871                 src.position += 3;
 872         }
 873
 874         lines = g_ptr_array_new ();
 875         while (*src.position != '\0' && src.position < data_end) {
 876                 GPtrArray *line;
 877
 878                 if (row == GNM_MAX_ROWS) {
 879                         parseoptions->rows_exceeded = TRUE;
 880                         break;
 881                 }
 882
 883                 line = parseoptions->parsetype == PARSE_TYPE_CSV
 884                         ? stf_parse_csv_line (&src, parseoptions)
 885                         : stf_parse_fixed_line (&src, parseoptions);
 886
 887                 g_ptr_array_add (lines, line);
 888                 if (parseoptions->parsetype != PARSE_TYPE_CSV)
 889                         src.position += compare_terminator (src.position, parseoptions);
 890                 row++;
 891         }
 892
 893         return lines;
 894 }
 895
 896 /**
 897  * stf_parse_lines: (skip)
 898  * @parseoptions: #StfParseOptions_t
 899  * @lines_chunk:
 900  * @data:
 901  * @maxlines:
 902  * @with_lineno:
 903  *
 904  * Returns: (transfer full): a GPtrArray of lines, where each line is itself a
 905  * GPtrArray of strings.
 906  *
 907  * The caller must free this entire structure, for example by calling
 908  * stf_parse_general_free.
 909  **/
 910 GPtrArray *
 911 stf_parse_lines (StfParseOptions_t *parseoptions,
 912                  GStringChunk *lines_chunk,
 913                  char const *data,
 914                  int maxlines, gboolean with_lineno)
 915 {
 916         GPtrArray *lines;
 917         int lineno = 1;
 918
 919         g_return_val_if_fail (data != NULL, NULL);
 920
 921         lines = g_ptr_array_new ();
 922         while (*data) {
 923                 char const *data0 = data;
 924                 GPtrArray *line = g_ptr_array_new ();
 925
 926                 if (with_lineno) {
 927                         char buf[4 * sizeof (int)];
 928                         sprintf (buf, "%d", lineno);
 929                         g_ptr_array_add (line,
 930                                          g_string_chunk_insert (lines_chunk, buf));
 931                 }
 932
 933                 while (1) {
 934                         int termlen = compare_terminator (data, parseoptions);
 935                         if (termlen > 0 || *data == 0) {
 936                                 g_ptr_array_add (line,
 937                                                  g_string_chunk_insert_len (lines_chunk,
 938                                                                             data0,
 939                                                                             data - data0));
 940                                 data += termlen;
 941                                 break;
 942                         } else
 943                                 data = g_utf8_next_char (data);
 944                 }
 945
 946                 g_ptr_array_add (lines, line);
 947
 948                 lineno++;
 949                 if (lineno >= maxlines)
 950                         break;
 951         }
 952         return lines;
 953 }
 954
 955 char const *
 956 stf_parse_find_line (StfParseOptions_t *parseoptions,
 957                      char const *data,
 958                      int line)
 959 {
 960         while (line > 0) {
 961                 int termlen = compare_terminator (data, parseoptions);
 962                 if (termlen > 0) {
 963                         data += termlen;
 964                         line--;
 965                 } else if (*data == 0) {
 966                         return data;
 967                 } else {
 968                         data = g_utf8_next_char (data);
 969                 }
 970         }
 971         return data;
 972 }
 973
 974
 975 /**
 976  * stf_parse_options_fixed_autodiscover:
 977  * @parseoptions: a Parse options struct.
 978  * @data: The actual data.
 979  * @data_end: data end.
 980  *
 981  * Automatically try to discover columns in the text to be parsed.
 982  * We ignore empty lines (only containing parseoptions->terminator)
 983  *
 984  * FIXME: This is so extremely ugly that I am too tired to rewrite it right now.
 985  *        Think hard of a better more flexible solution...
 986  **/
 987 void
 988 stf_parse_options_fixed_autodiscover (StfParseOptions_t *parseoptions,
 989                                       char const *data, char const *data_end)
 990 {
 991         char const *iterator = data;
 992         GSList *list = NULL;
 993         GSList *list_start = NULL;
 994         int lines = 0;
 995         int effective_lines = 0;
 996         int max_line_length = 0;
 997         int *line_begin_hits = NULL;
 998         int *line_end_hits = NULL;
 999         int i;
1000
1001         stf_parse_options_fixed_splitpositions_clear (parseoptions);
1002
1003         /*
1004          * First take a look at all possible white space combinations
1005          */
1006         while (*iterator && iterator < data_end) {
1007                 gboolean begin_recorded = FALSE;
1008                 AutoDiscovery_t *disc = NULL;
1009                 int position = 0;
1010                 int termlen = 0;
1011
1012                 while (*iterator && (termlen = compare_terminator (iterator, parseoptions)) == 0) {
1013                         if (!begin_recorded && *iterator == ' ') {
1014                                 disc = g_new0 (AutoDiscovery_t, 1);
1015
1016                                 disc->start = position;
1017
1018                                 begin_recorded = TRUE;
1019                         } else if (begin_recorded && *iterator != ' ') {
1020                                 disc->stop = position;
1021                                 list = g_slist_prepend (list, disc);
1022
1023                                 begin_recorded = FALSE;
1024                                 disc = NULL;
1025                         }
1026
1027                         position++;
1028                         iterator++;
1029                 }
1030
1031                 if (position > max_line_length)
1032                         max_line_length = position;
1033
1034                 /*
1035                  * If there are excess spaces at the end of
1036                  * the line : ignore them
1037                  */
1038                 g_free (disc);
1039
1040                 /*
1041                  * Hop over the terminator
1042                  */
1043                 iterator += termlen;
1044
1045                 if (position != 0)
1046                         effective_lines++;
1047
1048                 lines++;
1049         }
1050
1051         list       = g_slist_reverse (list);
1052         list_start = list;
1053
1054         /*
1055          * Kewl stuff:
1056          * Look at the number of hits at each line position
1057          * if the number of hits equals the number of lines
1058          * we can be pretty sure this is the start or end
1059          * of a column, we filter out empty columns
1060          * later
1061          */
1062         line_begin_hits = g_new0 (int, max_line_length + 1);
1063         line_end_hits   = g_new0 (int, max_line_length + 1);
1064
1065         while (list) {
1066                 AutoDiscovery_t *disc = list->data;
1067
1068                 line_begin_hits[disc->start]++;
1069                 line_end_hits[disc->stop]++;
1070
1071                 g_free (disc);
1072
1073                 list = g_slist_next (list);
1074         }
1075         g_slist_free (list_start);
1076
1077         for (i = 0; i < max_line_length + 1; i++)
1078                 if (line_begin_hits[i] == effective_lines || line_end_hits[i] == effective_lines)
1079                         stf_parse_options_fixed_splitpositions_add (parseoptions, i);
1080
1081         /*
1082          * Do some corrections to the initial columns
1083          * detected here, we obviously don't need to
1084          * do this if there are no columns at all.
1085          */
1086         if (my_garray_len (parseoptions->splitpositions) > 0) {
1087                 /*
1088                  * Try to find columns that look like:
1089                  *
1090                  * Example     100
1091                  * Example2      9
1092                  *
1093                  * (In other words : Columns with left & right justification with
1094                  *  a minimum of 2 spaces in the middle)
1095                  * Split these columns in 2
1096                  */
1097
1098                 for (i = 0; i < my_garray_len (parseoptions->splitpositions) - 1; i++) {
1099                         int begin = g_array_index (parseoptions->splitpositions, int, i);
1100                         int end   = g_array_index (parseoptions->splitpositions, int, i + 1);
1101                         int num_spaces   = -1;
1102                         int spaces_start = 0;
1103                         gboolean right_aligned = TRUE;
1104                         gboolean left_aligned  = TRUE;
1105                         gboolean has_2_spaces  = TRUE;
1106
1107                         iterator = data;
1108                         lines = 0;
1109                         while (*iterator && iterator < data_end) {
1110                                 gboolean trigger = FALSE;
1111                                 gboolean space_trigger = FALSE;
1112                                 int pos = 0;
1113
1114                                 num_spaces   = -1;
1115                                 spaces_start = 0;
1116                                 while (*iterator && !compare_terminator (iterator, parseoptions)) {
1117                                         if (pos == begin) {
1118                                                 if (*iterator == ' ')
1119                                                         left_aligned = FALSE;
1120
1121                                                 trigger = TRUE;
1122                                         } else if (pos == end - 1) {
1123                                                 if (*iterator == ' ')
1124                                                         right_aligned = FALSE;
1125
1126                                                 trigger = FALSE;
1127                                         }
1128
1129                                         if (trigger || pos == end - 1) {
1130                                                 if (!space_trigger && *iterator == ' ') {
1131                                                         space_trigger = TRUE;
1132                                                         spaces_start = pos;
1133                                                 } else if (space_trigger && *iterator != ' ') {
1134                                                         space_trigger = FALSE;
1135                                                         num_spaces = pos - spaces_start;
1136                                                 }
1137                                         }
1138
1139                                         iterator++;
1140                                         pos++;
1141                                 }
1142
1143                                 if (num_spaces < 2)
1144                                         has_2_spaces = FALSE;
1145
1146                                 if (*iterator)
1147                                         iterator++;
1148
1149                                 lines++;
1150                         }
1151
1152                         /*
1153                          * If this column meets all the criteria
1154                          * split it into two at the last measured
1155                          * spaces_start + num_spaces
1156                          */
1157                         if (has_2_spaces && right_aligned && left_aligned) {
1158                                 int val = (((spaces_start + num_spaces) - spaces_start) / 2) + spaces_start;
1159
1160                                 g_array_insert_val (parseoptions->splitpositions, i + 1, val);
1161
1162                                 /*
1163                                  * Skip over the inserted column
1164                                  */
1165                                 i++;
1166                         }
1167                 }
1168
1169                 /*
1170                  * Remove empty columns here if needed
1171                  */
1172                 for (i = 0; i < my_garray_len (parseoptions->splitpositions) - 1; i++) {
1173                         int begin = g_array_index (parseoptions->splitpositions, int, i);
1174                         int end = g_array_index (parseoptions->splitpositions, int, i + 1);
1175                         gboolean only_spaces = TRUE;
1176
1177                         iterator = data;
1178                         lines = 0;
1179                         while (*iterator && iterator < data_end) {
1180                                 gboolean trigger = FALSE;
1181                                 int pos = 0;
1182
1183                                 while (*iterator && !compare_terminator (iterator, parseoptions)) {
1184                                         if (pos == begin)
1185                                                 trigger = TRUE;
1186                                         else if (pos == end)
1187                                                 trigger = FALSE;
1188
1189                                         if (trigger) {
1190                                                 if (*iterator != ' ')
1191                                                         only_spaces = FALSE;
1192                                         }
1193
1194                                         iterator++;
1195                                         pos++;
1196                                 }
1197
1198                                 if (*iterator)
1199                                         iterator++;
1200
1201                                 lines++;
1202                         }
1203
1204                         /*
1205                          * The column only contains spaces
1206                          * remove it
1207                          */
1208                         if (only_spaces) {
1209                                 g_array_remove_index (parseoptions->splitpositions, i);
1210
1211                                 /*
1212                                  * We HAVE to make sure that the next column (end) also
1213                                  * gets checked out. If we don't decrease "i" here, we
1214                                  * will skip over it as the indexes shift down after
1215                                  * the removal
1216                                  */
1217                                 i--;
1218                         }
1219                 }
1220         }
1221
1222         g_free (line_begin_hits);
1223         g_free (line_end_hits);
1224 }
1225
1226 /*******************************************************************************************************
1227  * STF PARSE HL: high-level functions that dump the raw data returned by the low-level parsing
1228  *               functions into something meaningful (== application specific)
1229  *******************************************************************************************************/
1230
1231 /*
1232  * This is more or less as gnm_cell_set_text, except...
1233  * 1. Unknown names are not allowed.
1234  * 2. Only '=' can start an expression.
1235  */
1236
1237 static void
1238 stf_cell_set_text (GnmCell *cell, char const *text)
1239 {
1240         GnmExprTop const *texpr;
1241         GnmValue *val;
1242         GOFormat const *fmt = gnm_style_get_format (gnm_cell_get_style (cell));
1243         const GODateConventions *date_conv =
1244                 workbook_date_conv (cell->base.sheet->workbook);
1245
1246         if (!go_format_is_text (fmt) && *text == '=' && text[1] != 0) {
1247                 GnmExprParseFlags flags =
1248                         GNM_EXPR_PARSE_UNKNOWN_NAMES_ARE_INVALID;
1249                 const char *expr_start = text + 1;
1250                 GnmParsePos pos;
1251                 val = NULL;
1252                 parse_pos_init_cell (&pos, cell);
1253                 texpr = gnm_expr_parse_str (expr_start, &pos, flags,
1254                                             NULL, NULL);
1255         } else {
1256                 texpr = NULL;
1257                 val = format_match (text, fmt, date_conv);
1258         }
1259
1260         if (!val && !texpr)
1261                 val = value_new_string (text);
1262
1263         if (val)
1264                 gnm_cell_set_value (cell, val);
1265         else {
1266                 gnm_cell_set_expr (cell, texpr);
1267                 gnm_expr_top_unref (texpr);
1268         }
1269 }
1270
1271 static void
1272 stf_read_remember_settings (Workbook *book, StfParseOptions_t *po)
1273 {
1274         if (po->parsetype == PARSE_TYPE_CSV) {
1275                 GnmStfExport *stfe = gnm_stf_get_stfe (G_OBJECT (book));
1276                 char quote[6];
1277                 int length = g_unichar_to_utf8 (po->stringindicator, quote);
1278                 if (length > 5) {
1279                         quote[0] = '"';
1280                         quote[1] = '\0';
1281                 } else quote[length] = '\0';
1282
1283                 g_object_set (G_OBJECT (stfe), "separator", po->sep.chr, "quote", &quote, NULL);
1284
1285                 if ((po->terminator != NULL) &&  (po->terminator->data != NULL))
1286                         g_object_set (G_OBJECT (stfe), "eol", po->terminator->data, NULL);
1287         }
1288 }
1289
1290 gboolean
1291 stf_parse_sheet (StfParseOptions_t *parseoptions,
1292                  char const *data, char const *data_end,
1293                  Sheet *sheet, int start_col, int start_row)
1294 {
1295         int row;
1296         unsigned int lrow;
1297         GStringChunk *lines_chunk;
1298         GPtrArray *lines;
1299         gboolean result = TRUE;
1300         int col;
1301         unsigned int lcol;
1302         size_t nformats;
1303
1304         SETUP_LOCALE_SWITCH;
1305
1306         g_return_val_if_fail (parseoptions != NULL, FALSE);
1307         g_return_val_if_fail (data != NULL, FALSE);
1308         g_return_val_if_fail (IS_SHEET (sheet), FALSE);
1309
1310         if (!data_end)
1311                 data_end = data + strlen (data);
1312
1313         lines_chunk = g_string_chunk_new (100 * 1024);
1314         lines = stf_parse_general (parseoptions, lines_chunk, data, data_end);
1315         if (lines == NULL)
1316                 result = FALSE;
1317
1318         col = start_col;
1319         nformats = parseoptions->formats->len;
1320         for (lcol = 0; lcol < nformats; lcol++) {
1321                 GOFormat const *fmt = g_ptr_array_index (parseoptions->formats, lcol);
1322                 GnmStyle *mstyle;
1323                 gboolean want_col =
1324                         (parseoptions->col_import_array == NULL ||
1325                          parseoptions->col_import_array_len <= lcol ||
1326                          parseoptions->col_import_array[lcol]);
1327                 if (!want_col || col >= gnm_sheet_get_max_cols (sheet))
1328                         continue;
1329
1330                 if (fmt && !go_format_is_general (fmt)) {
1331                         GnmRange r;
1332                         int end_row = MIN (start_row + (int)lines->len - 1,
1333                                            gnm_sheet_get_last_row (sheet));
1334
1335                         range_init (&r, col, start_row, col, end_row);
1336                         mstyle = gnm_style_new ();
1337                         gnm_style_set_format (mstyle, fmt);
1338                         sheet_apply_style (sheet, &r, mstyle);
1339                 }
1340                 col++;
1341         }
1342
1343         START_LOCALE_SWITCH;
1344         for (row = start_row, lrow = 0;
1345              result && lrow < lines->len;
1346              row++, lrow++) {
1347                 GPtrArray *line;
1348
1349                 if (row >= gnm_sheet_get_max_rows (sheet)) {
1350                         if (!parseoptions->rows_exceeded) {
1351                                 /* FIXME: What locale?  */
1352                                 g_warning (_("There are more rows of data than "
1353                                              "there is room for in the sheet.  Extra "
1354                                              "rows will be ignored."));
1355                                 parseoptions->rows_exceeded = TRUE;
1356                         }
1357                         break;
1358                 }
1359
1360                 col = start_col;
1361                 line = g_ptr_array_index (lines, lrow);
1362
1363                 for (lcol = 0; lcol < line->len; lcol++) {
1364                         GOFormat const *fmt = lcol < nformats
1365                                 ? g_ptr_array_index (parseoptions->formats, lcol)
1366                                 : go_format_general ();
1367                         char const *text = g_ptr_array_index (line, lcol);
1368                         gboolean want_col =
1369                                 (parseoptions->col_import_array == NULL ||
1370                                  parseoptions->col_import_array_len <= lcol ||
1371                                  parseoptions->col_import_array[lcol]);
1372                         if (!want_col)
1373                                 continue;
1374
1375                         if (col >= gnm_sheet_get_max_cols (sheet)) {
1376                                 if (!parseoptions->cols_exceeded) {
1377                                         /* FIXME: What locale?  */
1378                                         g_warning (_("There are more columns of data than "
1379                                                      "there is room for in the sheet.  Extra "
1380                                                      "columns will be ignored."));
1381                                         parseoptions->cols_exceeded = TRUE;
1382                                 }
1383                                 break;
1384                         }
1385                         if (text && *text) {
1386                                 GnmCell *cell = sheet_cell_fetch (sheet, col, row);
1387                                 if (!go_format_is_text (fmt) &&
1388                                     lcol < parseoptions->formats_decimal->len &&
1389                                     g_ptr_array_index (parseoptions->formats_decimal, lcol)) {
1390                                         GOFormatFamily fam;
1391                                         GnmValue *v = format_match_decimal_number_with_locale
1392                                                 (text, &fam,
1393                                                  g_ptr_array_index (parseoptions->formats_curr, lcol),
1394                                                  g_ptr_array_index (parseoptions->formats_thousand, lcol),
1395                                                  g_ptr_array_index (parseoptions->formats_decimal, lcol));
1396                                         if (!v)
1397                                                 v = value_new_string (text);
1398                                         sheet_cell_set_value (cell, v);
1399                                 } else {
1400
1401                                         stf_cell_set_text (cell, text);
1402                                 }
1403                         }
1404                         col++;
1405                 }
1406
1407                 g_ptr_array_index (lines, lrow) = NULL;
1408                 g_ptr_array_free (line, TRUE);
1409         }
1410         END_LOCALE_SWITCH;
1411
1412         for (lcol = 0, col = start_col;
1413              lcol < parseoptions->col_import_array_len  && col < gnm_sheet_get_max_cols (sheet);
1414              lcol++) {
1415                 if (parseoptions->col_import_array == NULL ||
1416                     parseoptions->col_import_array_len <= lcol ||
1417                     parseoptions->col_import_array[lcol]) {
1418                         if (parseoptions->col_autofit_array == NULL ||
1419                             parseoptions->col_autofit_array[lcol]) {
1420                                 ColRowIndexList *list = colrow_get_index_list (col, col, NULL);
1421                                 ColRowStateGroup  *state = colrow_set_sizes (sheet, TRUE, list, -1, 0, -1);
1422                                 colrow_index_list_destroy (list);
1423                                 g_slist_free (state);
1424                         }
1425                         col++;
1426                 }
1427         }
1428
1429         g_string_chunk_free (lines_chunk);
1430         if (lines)
1431                 stf_parse_general_free (lines);
1432         if (result)
1433                 stf_read_remember_settings (sheet->workbook, parseoptions);
1434         return result;
1435 }
1436
1437 GnmCellRegion *
1438 stf_parse_region (StfParseOptions_t *parseoptions, char const *data, char const *data_end,
1439                   Workbook const *wb)
1440 {
1441         static GODateConventions const default_conv = {FALSE};
1442         GODateConventions const *date_conv = wb ? workbook_date_conv (wb) : &default_conv;
1443
1444         GnmCellRegion *cr;
1445         unsigned int row, colhigh = 0;
1446         GStringChunk *lines_chunk;
1447         GPtrArray *lines;
1448         size_t nformats;
1449
1450         SETUP_LOCALE_SWITCH;
1451
1452         g_return_val_if_fail (parseoptions != NULL, NULL);
1453         g_return_val_if_fail (data != NULL, NULL);
1454
1455         START_LOCALE_SWITCH;
1456
1457         cr = gnm_cell_region_new (NULL);
1458
1459         if (!data_end)
1460                 data_end = data + strlen (data);
1461         lines_chunk = g_string_chunk_new (100 * 1024);
1462         lines = stf_parse_general (parseoptions, lines_chunk, data, data_end);
1463         nformats = parseoptions->formats->len;
1464         for (row = 0; row < lines->len; row++) {
1465                 GPtrArray *line = g_ptr_array_index (lines, row);
1466                 unsigned int col, targetcol = 0;
1467                 for (col = 0; col < line->len; col++) {
1468                         if (parseoptions->col_import_array == NULL ||
1469                             parseoptions->col_import_array_len <= col ||
1470                             parseoptions->col_import_array[col]) {
1471                                 const char *text = g_ptr_array_index (line, col);
1472                                 if (text) {
1473                                         GOFormat *fmt = NULL;
1474                                         GnmValue *v;
1475                                         GnmCellCopy *cc;
1476
1477                                         if (col < nformats)
1478                                                 fmt = g_ptr_array_index (parseoptions->formats, col);
1479                                         v = format_match (text, fmt, date_conv);
1480                                         if (!v)
1481                                                 v = value_new_string (text);
1482
1483                                         cc = gnm_cell_copy_new (cr, targetcol, row);
1484                                         cc->val  = v;
1485                                         cc->texpr = NULL;
1486                                         targetcol++;
1487                                         if (targetcol > colhigh)
1488                                                 colhigh = targetcol;
1489                                 }
1490                         }
1491                 }
1492         }
1493         stf_parse_general_free (lines);
1494         g_string_chunk_free (lines_chunk);
1495
1496         END_LOCALE_SWITCH;
1497
1498         cr->cols    = (colhigh > 0) ? colhigh : 1;
1499         cr->rows    = row;
1500
1501         return cr;
1502 }
1503
1504 static int
1505 int_sort (void const *a, void const *b)
1506 {
1507         return *(int const *)a - *(int const *)b;
1508 }
1509
1510 static int
1511 count_character (GPtrArray *lines, gunichar c, double quantile)
1512 {
1513         int *counts, res;
1514         unsigned int lno, cno;
1515
1516         if (lines->len == 0)
1517                 return 0;
1518
1519         counts = g_new (int, lines->len);
1520         for (lno = cno = 0; lno < lines->len; lno++) {
1521                 int count = 0;
1522                 GPtrArray *boxline = g_ptr_array_index (lines, lno);
1523                 char const *line = g_ptr_array_index (boxline, 0);
1524
1525                 /* Ignore empty lines.  */
1526                 if (*line == 0)
1527                         continue;
1528
1529                 while (*line) {
1530                         if (g_utf8_get_char (line) == c)
1531                                 count++;
1532                         line = g_utf8_next_char (line);
1533                 }
1534
1535                 counts[cno++] = count;
1536         }
1537
1538         if (cno == 0)
1539                 res = 0;
1540         else {
1541                 unsigned int qi = (unsigned int)ceil (quantile * cno);
1542                 qsort (counts, cno, sizeof (counts[0]), int_sort);
1543                 if (qi == cno)
1544                         qi--;
1545                 res = counts[qi];
1546         }
1547
1548         g_free (counts);
1549
1550         return res;
1551 }
1552
1553 static void
1554 dump_guessed_options (const StfParseOptions_t *res)
1555 {
1556         GSList *l;
1557         char ubuffer[6 + 1];
1558         unsigned ui;
1559
1560         g_printerr ("Guessed format:\n");
1561         switch (res->parsetype) {
1562         case PARSE_TYPE_CSV:
1563                 g_printerr ("  type = sep\n");
1564                 g_printerr ("  separator = %s\n",
1565                             res->sep.chr ? res->sep.chr : "(none)");
1566                 g_printerr ("    see two as one = %s\n",
1567                             res->sep.duplicates ? "yes" : "no");
1568                 break;
1569         case PARSE_TYPE_FIXED:
1570                 g_printerr ("  type = sep\n");
1571                 break;
1572         default:
1573                 ;
1574         }
1575         g_printerr ("  trim space = %d\n", res->trim_spaces);
1576
1577         ubuffer[g_unichar_to_utf8 (res->stringindicator, ubuffer)] = 0;
1578         g_printerr ("  string indicator = %s\n", ubuffer);
1579         g_printerr ("    see two as one = %s\n",
1580                     res->indicator_2x_is_single ? "yes" : "no");
1581
1582         g_printerr ("  line terminators =");
1583         for (l = res->terminator; l; l = l->next) {
1584                 const char *t = l->data;
1585                 if (strcmp (t, "\n") == 0)
1586                         g_printerr (" unix");
1587                 else if (strcmp (t, "\r") == 0)
1588                         g_printerr (" mac");
1589                 else if (strcmp (t, "\r\n") == 0)
1590                         g_printerr (" dos");
1591                 else
1592                         g_printerr (" other");
1593         }
1594         g_printerr ("\n");
1595
1596         for (ui = 0; ui < res->formats->len; ui++) {
1597                 GOFormat const *fmt = g_ptr_array_index (res->formats, ui);
1598                 const GString *decimal = ui < res->formats_decimal->len
1599                         ? g_ptr_array_index (res->formats_decimal, ui)
1600                         : NULL;
1601                 const GString *thousand = ui < res->formats_thousand->len
1602                         ? g_ptr_array_index (res->formats_thousand, ui)
1603                         : NULL;
1604
1605                 g_printerr ("  fmt.%d = %s\n", ui, go_format_as_XL (fmt));
1606                 if (decimal)
1607                         g_printerr ("  fmt.%d.dec = %s\n", ui, decimal->str);
1608                 if (thousand)
1609                         g_printerr ("  fmt.%d.thou = %s\n", ui, thousand->str);
1610         }
1611 }
1612
1613 /**
1614  * stf_parse_options_guess:
1615  * @data: the input data.
1616  *
1617  * Returns: (transfer full): the guessed options.
1618  **/
1619 StfParseOptions_t *
1620 stf_parse_options_guess (char const *data)
1621 {
1622         StfParseOptions_t *res;
1623         GStringChunk *lines_chunk;
1624         GPtrArray *lines;
1625         int tabcount;
1626         int sepcount;
1627         gunichar sepchar = go_locale_get_arg_sep ();
1628
1629         g_return_val_if_fail (data != NULL, NULL);
1630
1631         res = stf_parse_options_new ();
1632         lines_chunk = g_string_chunk_new (100 * 1024);
1633         lines = stf_parse_lines (res, lines_chunk, data, 1000, FALSE);
1634
1635         tabcount = count_character (lines, '\t', 0.2);
1636         sepcount = count_character (lines, sepchar, 0.2);
1637
1638         /* At least one tab per line and enough to separate every
1639            would-be sepchars.  */
1640         if (tabcount >= 1 && tabcount >= sepcount - 1)
1641                 stf_parse_options_csv_set_separators (res, "\t", NULL);
1642         else {
1643                 gunichar c;
1644
1645                 /*
1646                  * Try a few more or less likely characters and pick the first
1647                  * one that occurs on at least half the lines.
1648                  *
1649                  * The order is mostly random, although ' ' and '!' which
1650                  * could very easily occur in text are put last.
1651                  */
1652                 if (count_character (lines, (c = sepchar), 0.5) > 0 ||
1653                     count_character (lines, (c = go_locale_get_col_sep ()), 0.5) > 0 ||
1654                     count_character (lines, (c = ':'), 0.5) > 0 ||
1655                     count_character (lines, (c = ','), 0.5) > 0 ||
1656                     count_character (lines, (c = ';'), 0.5) > 0 ||
1657                     count_character (lines, (c = '|'), 0.5) > 0 ||
1658                     count_character (lines, (c = '!'), 0.5) > 0 ||
1659                     count_character (lines, (c = ' '), 0.5) > 0) {
1660                         char sep[7];
1661                         sep[g_unichar_to_utf8 (c, sep)] = 0;
1662                         if (c == ' ')
1663                                 strcat (sep, "\t");
1664                         stf_parse_options_csv_set_separators (res, sep, NULL);
1665                 }
1666         }
1667
1668         // For now, always separated:
1669         stf_parse_options_set_type (res, PARSE_TYPE_CSV);
1670
1671         switch (res->parsetype) {
1672         case PARSE_TYPE_CSV: {
1673                 gboolean dups =
1674                         res->sep.chr &&
1675                         strchr (res->sep.chr, ' ') != NULL;
1676                 gboolean trim =
1677                         res->sep.chr &&
1678                         strchr (res->sep.chr, ' ') != NULL;
1679
1680                 stf_parse_options_set_trim_spaces (res, TRIM_TYPE_LEFT | TRIM_TYPE_RIGHT);
1681                 stf_parse_options_csv_set_indicator_2x_is_single (res, TRUE);
1682                 stf_parse_options_csv_set_duplicates (res, dups);
1683                 stf_parse_options_csv_set_trim_seps (res, trim);
1684
1685                 stf_parse_options_csv_set_stringindicator (res, '"');
1686                 break;
1687         }
1688
1689         case PARSE_TYPE_FIXED:
1690                 break;
1691
1692         default:
1693                 g_assert_not_reached ();
1694         }
1695
1696         stf_parse_general_free (lines);
1697         g_string_chunk_free (lines_chunk);
1698
1699         stf_parse_options_guess_formats (res, data);
1700
1701         if (gnm_debug_flag ("stf"))
1702                 dump_guessed_options (res);
1703
1704         return res;
1705 }
1706
1707 /**
1708  * stf_parse_options_guess_csv:
1709  * @data: the CSV input data.
1710  *
1711  * Returns: (transfer full): the guessed options.
1712  **/
1713 StfParseOptions_t *
1714 stf_parse_options_guess_csv (char const *data)
1715 {
1716         StfParseOptions_t *res;
1717         GStringChunk *lines_chunk;
1718         GPtrArray *lines;
1719         char *sep = NULL;
1720         char const *quoteline = NULL;
1721         int pass;
1722         gunichar stringind = '"';
1723
1724         g_return_val_if_fail (data != NULL, NULL);
1725
1726         res = stf_parse_options_new ();
1727         stf_parse_options_set_type (res, PARSE_TYPE_CSV);
1728         stf_parse_options_set_trim_spaces (res, TRIM_TYPE_LEFT | TRIM_TYPE_RIGHT);
1729         stf_parse_options_csv_set_indicator_2x_is_single (res, TRUE);
1730         stf_parse_options_csv_set_duplicates (res, FALSE);
1731         stf_parse_options_csv_set_trim_seps (res, FALSE);
1732         stf_parse_options_csv_set_stringindicator (res, stringind);
1733
1734         lines_chunk = g_string_chunk_new (100 * 1024);
1735         lines = stf_parse_lines (res, lines_chunk, data, 1000, FALSE);
1736
1737         /*
1738          * Find a line containing a quote; skip first line unless it is
1739          * the only one.  Prefer a line with the quote first.
1740          */
1741         for (pass = 1; !quoteline && pass <= 2; pass++) {
1742                 size_t lno;
1743                 for (lno = MIN (1, lines->len - 1);
1744                      !quoteline && lno < lines->len;
1745                      lno++) {
1746                         GPtrArray *boxline = g_ptr_array_index (lines, lno);
1747                         const char *line = g_ptr_array_index (boxline, 0);
1748                         switch (pass) {
1749                         case 1:
1750                                 if (g_utf8_get_char (line) == stringind)
1751                                         quoteline = line;
1752                                 break;
1753                         case 2:
1754                                 if (my_utf8_strchr (line, stringind))
1755                                         quoteline = line;
1756                                 break;
1757                         }
1758                 }
1759         }
1760
1761         if (quoteline) {
1762                 const char *p0 = my_utf8_strchr (quoteline, stringind);
1763                 const char *p = p0;
1764
1765                 do {
1766                         p = g_utf8_next_char (p);
1767                 } while (*p && g_utf8_get_char (p) != stringind);
1768                 if (*p) p = g_utf8_next_char (p);
1769                 while (*p && g_unichar_isspace (g_utf8_get_char (p)))
1770                         p = g_utf8_next_char (p);
1771                 if (*p) {
1772                         /* Use the character after the quote.  */
1773                         sep = g_strndup (p, g_utf8_next_char (p) - p);
1774                 } else {
1775                         /* Try to use character before the quote.  */
1776                         while (p0 > quoteline && !sep) {
1777                                 p = p0;
1778                                 p0 = g_utf8_prev_char (p0);
1779                                 if (!g_unichar_isspace (g_utf8_get_char (p0)))
1780                                         sep = g_strndup (p0, p - p0);
1781                         }
1782                 }
1783         }
1784
1785         if (!sep)
1786                 sep = g_strdup (",");
1787         stf_parse_options_csv_set_separators (res, sep, NULL);
1788         g_free (sep);
1789
1790         stf_parse_general_free (lines);
1791         g_string_chunk_free (lines_chunk);
1792
1793         stf_parse_options_guess_formats (res, data);
1794
1795         if (gnm_debug_flag ("stf"))
1796                 dump_guessed_options (res);
1797
1798         return res;
1799 }
1800
1801 typedef enum {
1802         STF_GUESS_DATE_DMY = 1,
1803         STF_GUESS_DATE_MDY = 2,
1804         STF_GUESS_DATE_YMD = 4,
1805
1806         STF_GUESS_NUMBER_DEC_POINT = 0x10,
1807         STF_GUESS_NUMBER_DEC_COMMA = 0x20,
1808         STF_GUESS_NUMBER_DEC_EITHER = 0x30,
1809
1810         STF_GUESS_ALL = 0x37
1811 } StfGuessFormats;
1812
1813 static void
1814 do_check_date (const char *data, StfGuessFormats flag,
1815                gboolean mbd, gboolean ybm,
1816                unsigned *possible,
1817                GODateConventions const *date_conv)
1818 {
1819         GnmValue *v;
1820         gboolean this_mbd, this_ybm;
1821         int imbd;
1822
1823         if (!(*possible & flag))
1824                 return;
1825
1826         v = format_match_datetime (data, date_conv, mbd, TRUE, FALSE);
1827         if (!v || !VALUE_FMT (v))
1828                 goto fail;
1829
1830         imbd = go_format_month_before_day (VALUE_FMT (v));
1831         this_mbd = (imbd >= 1);
1832         this_ybm = (imbd == 2);
1833         if (mbd != this_mbd || ybm != this_ybm)
1834                 goto fail;
1835
1836         goto done;
1837
1838 fail:
1839         *possible &= ~flag;
1840 done:
1841         value_release (v);
1842 }
1843
1844
1845 static void
1846 do_check_number (const char *data, StfGuessFormats flag,
1847                  const GString *dec, const GString *thousand, const GString *curr,
1848                  unsigned *possible, int *decimals)
1849 {
1850         GnmValue *v;
1851         GOFormatFamily family;
1852         const char *pthou;
1853
1854         if (!(*possible & flag))
1855                 return;
1856
1857         v = format_match_decimal_number_with_locale (data, &family, curr, thousand, dec);
1858         if (!v)
1859                 goto fail;
1860
1861         if (*decimals != -2) {
1862                 const char *pdec = strstr (data, dec->str);
1863                 int this_decimals = 0;
1864                 if (pdec) {
1865                         pdec += dec->len;
1866                         while (g_ascii_isdigit (*pdec)) {
1867                                 pdec++;
1868                                 this_decimals++;
1869                         }
1870                 }
1871                 if (*decimals == -1)
1872                         *decimals = this_decimals;
1873                 else if (*decimals != this_decimals)
1874                         *decimals = -2;
1875         }
1876
1877         pthou = strstr (data, thousand->str);
1878         if (pthou) {
1879                 const char *p;
1880                 int digits = 0, nonzero_digits = 0;
1881                 for (p = data; p < pthou; p = g_utf8_next_char (p)) {
1882                         if (g_unichar_isdigit (g_utf8_get_char (p))) {
1883                                 digits++;
1884                                 if (*p != '0')
1885                                         nonzero_digits++;
1886                         }
1887                 }
1888                 // "-.222" implies that "." is not a thousands separator.
1889                 // "0.222" implies that "." is not a thousands separator.
1890                 // "12345,555" implies that "," is not a thousands separator.
1891                 if (nonzero_digits == 0 || digits > 3)
1892                         goto fail;
1893         }
1894
1895         goto done;
1896
1897 fail:
1898         *possible &= ~flag;
1899 done:
1900         value_release (v);
1901 }
1902
1903
1904 /**
1905  * stf_parse_options_guess_formats:
1906  * @data: the CSV input data.
1907  *
1908  * This function attempts to recognize data formats on a column-by-column
1909  * basis under the assumption that the data in a text file will generally
1910  * use the same data formats.
1911  *
1912  * This is useful because not all values give sufficient information by
1913  * themselves to tell what format the data is in.  For example, "1/2/2000"
1914  * is likely to be a date in year 2000, but it is not clear if it is in
1915  * January or February.  If another value in the same column is "31/1/1999"
1916  * then it is likely that the former date was in February.
1917  *
1918  * Likewise, a value of "123,456" could mean either 1.23456e5 or 1.23456e2.
1919  * A later value of "111,200.22" would clear up the confusion.
1920  *
1921  **/
1922 void
1923 stf_parse_options_guess_formats (StfParseOptions_t *po, char const *data)
1924 {
1925         GStringChunk *lines_chunk;
1926         GPtrArray *lines;
1927         unsigned lno, col, colcount, sline;
1928         GODateConventions const *date_conv = go_date_conv_from_str ("Lotus:1900");
1929         GString *s_comma = g_string_new (",");
1930         GString *s_dot = g_string_new (".");
1931         GString *s_dollar = g_string_new ("$");
1932         gboolean debug = gnm_debug_flag ("stf");
1933
1934         g_ptr_array_set_size (po->formats, 0);
1935         g_ptr_array_set_size (po->formats_decimal, 0);
1936         g_ptr_array_set_size (po->formats_thousand, 0);
1937         g_ptr_array_set_size (po->formats_curr, 0);
1938
1939         lines_chunk = g_string_chunk_new (100 * 1024);
1940         lines = stf_parse_general (po, lines_chunk, data, data + strlen (data));
1941
1942         colcount = 0;
1943         for (lno = 0; lno < lines->len; lno++) {
1944                 GPtrArray *line = g_ptr_array_index (lines, lno);
1945                 colcount = MAX (colcount, line->len);
1946         }
1947
1948         // Ignore first line unless it is the only one
1949         sline = MIN ((int)lines->len - 1, 1);
1950
1951         g_ptr_array_set_size (po->formats, colcount);
1952         g_ptr_array_set_size (po->formats_decimal, colcount);
1953         g_ptr_array_set_size (po->formats_thousand, colcount);
1954         g_ptr_array_set_size (po->formats_curr, colcount);
1955         for (col = 0; col < colcount; col++) {
1956                 unsigned possible = STF_GUESS_ALL;
1957                 GOFormat *fmt = NULL;
1958                 gboolean seen_dot = FALSE;
1959                 gboolean seen_comma = FALSE;
1960                 int decimals_if_point = -1; // -1: unset; -2: inconsistent; >=0: count
1961                 int decimals_if_comma = -1; // -1: unset; -2: inconsistent; >=0: count
1962
1963                 for (lno = sline; possible && lno < lines->len; lno++) {
1964                         GPtrArray *line = g_ptr_array_index (lines, lno);
1965                         const char *data = col < line->len ? g_ptr_array_index (line, col) : "";
1966                         unsigned prev_possible = possible;
1967
1968                         if (*data == 0 || data[0] == '\'')
1969                                 continue;
1970
1971                         do_check_date (data, STF_GUESS_DATE_DMY, FALSE, FALSE, &possible, date_conv);
1972                         do_check_date (data, STF_GUESS_DATE_MDY, TRUE, FALSE, &possible, date_conv);
1973                         do_check_date (data, STF_GUESS_DATE_YMD, TRUE, TRUE, &possible, date_conv);
1974
1975                         if ((possible & STF_GUESS_NUMBER_DEC_EITHER) == STF_GUESS_NUMBER_DEC_EITHER) {
1976                                 const char *pdot = strstr (data, s_dot->str);
1977                                 const char *pcomma = strstr (data, s_comma->str);
1978                                 if (pdot && pcomma) {
1979                                         // Both -- last one is the decimal separator
1980                                         if (pdot > pcomma)
1981                                                 possible &= ~STF_GUESS_NUMBER_DEC_COMMA;
1982                                         else
1983                                                 possible &= ~STF_GUESS_NUMBER_DEC_POINT;
1984                                 } else if (pdot && strstr (pdot + s_dot->len, s_dot->str)) {
1985                                         // Two dots so they are thousands separators
1986                                         possible &= ~STF_GUESS_NUMBER_DEC_POINT;
1987                                 } else if (pcomma && strstr (pcomma + s_comma->len, s_comma->str)) {
1988                                         // Two commas so they are thousands separators
1989                                         possible &= ~STF_GUESS_NUMBER_DEC_COMMA;
1990                                 }
1991
1992                                 seen_dot = seen_dot || (pdot != 0);
1993                                 seen_comma = seen_comma || (pcomma != 0);
1994                         }
1995                         do_check_number (data, STF_GUESS_NUMBER_DEC_POINT,
1996                                          s_dot, s_comma, s_dollar,
1997                                          &possible, &decimals_if_point);
1998                         do_check_number (data, STF_GUESS_NUMBER_DEC_COMMA,
1999                                          s_comma, s_dot, s_dollar,
2000                                          &possible, &decimals_if_comma);
2001
2002                         if (possible != prev_possible && debug)
2003                                 g_printerr ("col=%d; after [%s] possible=0x%x\n", col, data, possible);
2004                 }
2005
2006                 if ((possible & STF_GUESS_NUMBER_DEC_EITHER) == STF_GUESS_NUMBER_DEC_EITHER &&
2007                     !seen_dot && !seen_comma) {
2008                         // It doesn't matter what the separators are
2009                         possible &= ~STF_GUESS_NUMBER_DEC_COMMA;
2010                 }
2011
2012                 switch (possible) {
2013                 case STF_GUESS_DATE_DMY:
2014                         fmt = go_format_new_from_XL ("d-mmm-yyyy");
2015                         break;
2016                 case STF_GUESS_DATE_MDY:
2017                         fmt = go_format_new_from_XL ("m/d/yyyy");
2018                         break;
2019                 case STF_GUESS_DATE_YMD:
2020                         fmt = go_format_new_from_XL ("yyyy-mm-dd");
2021                         break;
2022                 case STF_GUESS_NUMBER_DEC_POINT:
2023                         g_ptr_array_index (po->formats_decimal, col) = g_string_new (".");
2024                         g_ptr_array_index (po->formats_thousand, col) = g_string_new (",");
2025                         g_ptr_array_index (po->formats_curr, col) = g_string_new (s_dollar->str);
2026                         if (decimals_if_point > 0) {
2027                                 // Don't set format if decimals is zero
2028                                 GString *fmt_str = g_string_new (NULL);
2029                                 go_format_generate_number_str (fmt_str, 1, decimals_if_point, seen_comma, FALSE, FALSE, "", "");
2030                                 fmt = go_format_new_from_XL (fmt_str->str);
2031                                 g_string_free (fmt_str, TRUE);
2032                         }
2033                         break;
2034                 case STF_GUESS_NUMBER_DEC_COMMA:
2035                         g_ptr_array_index (po->formats_decimal, col) = g_string_new (",");
2036                         g_ptr_array_index (po->formats_thousand, col) = g_string_new (".");
2037                         g_ptr_array_index (po->formats_curr, col) = g_string_new (s_dollar->str);
2038                         if (decimals_if_comma > 0) {
2039                                 // Don't set format if decimals is zero
2040                                 GString *fmt_str = g_string_new (NULL);
2041                                 go_format_generate_number_str (fmt_str, 1, decimals_if_comma, seen_dot, FALSE, FALSE, "", "");
2042                                 fmt = go_format_new_from_XL (fmt_str->str);
2043                                 g_string_free (fmt_str, TRUE);
2044                         }
2045                         break;
2046                 default:
2047                         break;
2048                 }
2049
2050                 if (!fmt)
2051                         fmt = go_format_ref (go_format_general ());
2052                 g_ptr_array_index (po->formats, col) = fmt;
2053         }
2054
2055         stf_parse_general_free (lines);
2056         g_string_chunk_free (lines_chunk);
2057
2058         g_string_free (s_dot, TRUE);
2059         g_string_free (s_comma, TRUE);
2060         g_string_free (s_dollar, TRUE);
2061 }