src/stf-parse.c

   1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
   2 /*
   3  * stf-parse.c : Structured Text Format parser. (STF)
   4  *               A general purpose engine for parsing data
   5  *               in CSV and Fixed width format.
   6  *
   7  *
   8  * Copyright (C) Almer. S. Tigelaar.
   9  * EMail: almer1@dds.nl or almer-t@bigfoot.com
  10  *
  11  * Copyright (C) 2003 Andreas J. Guelzow <aguelzow@taliesin.ca>
  12  * Copyright (C) 2003,2008-2009 Morten Welinder <terra@gnome.org>
  13  *
  14  * This program is free software; you can redistribute it and/or modify
  15  * it under the terms of the GNU General Public License as published by
  16  * the Free Software Foundation; either version 2 of the License, or
  17  * (at your option) any later version.
  18  *
  19  * This program is distributed in the hope that it will be useful,
  20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22  * GNU General Public License for more details.
  23  *
  24  * You should have received a copy of the GNU General Public License
  25  * along with this program; if not, write to the Free Software
  26  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  27  */
  28
  29 #include <gnumeric-config.h>
  30 #include <glib/gi18n-lib.h>
  31 #include "gnumeric.h"
  32 #include "stf-parse.h"
  33 #include "stf-export.h"
  34
  35 #include "workbook.h"
  36 #include "cell.h"
  37 #include "sheet.h"
  38 #include "expr.h"
  39 #include "clipboard.h"
  40 #include "sheet-style.h"
  41 #include "value.h"
  42 #include "mstyle.h"
  43 #include "number-match.h"
  44 #include "gutils.h"
  45 #include "parse-util.h"
  46 #include "number-match.h"
  47 #include "gnm-format.h"
  48 #include "ranges.h"
  49 #include <goffice/goffice.h>
  50
  51 #include <stdlib.h>
  52 #include <locale.h>
  53 #include <string.h>
  54
  55 #define SETUP_LOCALE_SWITCH char *oldlocale = NULL
  56
  57 #define START_LOCALE_SWITCH if (parseoptions->locale) {\
  58 oldlocale = g_strdup(go_setlocale (LC_ALL, NULL)); \
  59 go_setlocale(LC_ALL, parseoptions->locale);}
  60
  61 #define END_LOCALE_SWITCH if (oldlocale) {\
  62 go_setlocale(LC_ALL, oldlocale);\
  63 g_free (oldlocale);}
  64
  65 /* Source_t struct, used for interchanging parsing information between the low level parse functions */
  66 typedef struct {
  67         GStringChunk *chunk;
  68         char const *position;  /* Indicates the current position within data */
  69
  70         /* Used internally for fixed width parsing */
  71         int splitpos;          /* Indicates current position in splitpositions array */
  72         int linepos;           /* Position on the current line */
  73 } Source_t;
  74
  75 /* Struct used for autodiscovery */
  76 typedef struct {
  77         int start;
  78         int stop;
  79 } AutoDiscovery_t;
  80
  81 /*
  82  * Some silly dude make the length field an unsigned int.  C just does
  83  * not deal very well with that.
  84  */
  85 static inline int
  86 my_garray_len (GArray const *a)
  87 {
  88         return (int)a->len;
  89 }
  90
  91 static char *
  92 my_utf8_strchr (const char *p, gunichar uc)
  93 {
  94         return uc < 0x7f ? strchr (p, uc) : g_utf8_strchr (p, -1, uc);
  95 }
  96
  97 static int
  98 compare_terminator (char const *s, StfParseOptions_t *parseoptions)
  99 {
 100         guchar const *us = (guchar const *)s;
 101         GSList *l;
 102
 103         if (*us > parseoptions->compiled_terminator.max ||
 104             *us < parseoptions->compiled_terminator.min)
 105                 return 0;
 106
 107         for (l = parseoptions->terminator; l; l = l->next) {
 108                 char const *term = l->data;
 109                 char const *d = s;
 110
 111                 while (*term) {
 112                         if (*d != *term)
 113                                 goto next;
 114                         term++;
 115                         d++;
 116                 }
 117                 return d - s;
 118
 119         next:
 120                 ;
 121         }
 122         return 0;
 123 }
 124
 125
 126 /*******************************************************************************************************
 127  * STF PARSE OPTIONS : StfParseOptions related
 128  *******************************************************************************************************/
 129
 130 static void
 131 gnm_g_string_free (GString *s)
 132 {
 133         if (s) g_string_free (s, TRUE);
 134 }
 135
 136
 137 /**
 138  * stf_parse_options_new:
 139  *
 140  * This will return a new StfParseOptions_t struct.
 141  * The struct should, after being used, freed with stf_parse_options_free.
 142  **/
 143 static StfParseOptions_t *
 144 stf_parse_options_new (void)
 145 {
 146         StfParseOptions_t* parseoptions = g_new0 (StfParseOptions_t, 1);
 147
 148         parseoptions->parsetype   = PARSE_TYPE_NOTSET;
 149
 150         parseoptions->terminator  = NULL;
 151         stf_parse_options_add_line_terminator (parseoptions, "\r\n");
 152         stf_parse_options_add_line_terminator (parseoptions, "\n");
 153         stf_parse_options_add_line_terminator (parseoptions, "\r");
 154
 155         parseoptions->trim_spaces = (TRIM_TYPE_RIGHT | TRIM_TYPE_LEFT);
 156         parseoptions->locale = NULL;
 157
 158         parseoptions->splitpositions = NULL;
 159         stf_parse_options_fixed_splitpositions_clear (parseoptions);
 160
 161         parseoptions->stringindicator = '"';
 162         parseoptions->indicator_2x_is_single = TRUE;
 163         parseoptions->sep.duplicates = FALSE;
 164         parseoptions->trim_seps = FALSE;
 165
 166         parseoptions->sep.str = NULL;
 167         parseoptions->sep.chr = NULL;
 168
 169         parseoptions->col_autofit_array = NULL;
 170         parseoptions->col_import_array = NULL;
 171         parseoptions->col_import_array_len = 0;
 172         parseoptions->formats = g_ptr_array_new_with_free_func ((GDestroyNotify)go_format_unref);
 173         parseoptions->formats_decimal = g_ptr_array_new_with_free_func ((GDestroyNotify)gnm_g_string_free);
 174         parseoptions->formats_thousand = g_ptr_array_new_with_free_func ((GDestroyNotify)gnm_g_string_free);
 175         parseoptions->formats_curr = g_ptr_array_new_with_free_func ((GDestroyNotify)gnm_g_string_free);
 176
 177         parseoptions->cols_exceeded = FALSE;
 178         parseoptions->rows_exceeded = FALSE;
 179         parseoptions->ref_count = 1;
 180
 181         return parseoptions;
 182 }
 183
 184 /**
 185  * stf_parse_options_free:
 186  *
 187  * will free @parseoptions, note that this will not free the splitpositions
 188  * member (GArray) of the struct, the caller is responsible for that.
 189  **/
 190 void
 191 stf_parse_options_free (StfParseOptions_t *parseoptions)
 192 {
 193         g_return_if_fail (parseoptions != NULL);
 194
 195         if (parseoptions->ref_count-- > 1)
 196                 return;
 197
 198         g_free (parseoptions->col_import_array);
 199         g_free (parseoptions->col_autofit_array);
 200         g_free (parseoptions->locale);
 201         g_free (parseoptions->sep.chr);
 202
 203         if (parseoptions->sep.str) {
 204                 GSList *l;
 205
 206                 for (l = parseoptions->sep.str; l != NULL; l = l->next)
 207                         g_free ((char *) l->data);
 208                 g_slist_free (parseoptions->sep.str);
 209         }
 210
 211         g_array_free (parseoptions->splitpositions, TRUE);
 212
 213         stf_parse_options_clear_line_terminator (parseoptions);
 214
 215         g_ptr_array_free (parseoptions->formats, TRUE);
 216         g_ptr_array_free (parseoptions->formats_decimal, TRUE);
 217         g_ptr_array_free (parseoptions->formats_thousand, TRUE);
 218         g_ptr_array_free (parseoptions->formats_curr, TRUE);
 219
 220         g_free (parseoptions);
 221 }
 222
 223 static StfParseOptions_t *
 224 stf_parse_options_ref (StfParseOptions_t *parseoptions)
 225 {
 226         parseoptions->ref_count++;
 227         return parseoptions;
 228 }
 229
 230 GType
 231 stf_parse_options_get_type (void)
 232 {
 233         static GType t = 0;
 234
 235         if (t == 0) {
 236                 t = g_boxed_type_register_static ("StfParseOptions_t",
 237                          (GBoxedCopyFunc)stf_parse_options_ref,
 238                          (GBoxedFreeFunc)stf_parse_options_free);
 239         }
 240         return t;
 241 }
 242
 243 void
 244 stf_parse_options_set_type (StfParseOptions_t *parseoptions, StfParseType_t const parsetype)
 245 {
 246         g_return_if_fail (parseoptions != NULL);
 247         g_return_if_fail (parsetype == PARSE_TYPE_CSV || parsetype == PARSE_TYPE_FIXED);
 248
 249         parseoptions->parsetype = parsetype;
 250 }
 251
 252 static gint
 253 long_string_first (gchar const *a, gchar const *b)
 254 {
 255         /* This actually is UTF-8 safe.  */
 256         return strlen (b) - strlen (a);
 257 }
 258
 259 static void
 260 compile_terminators (StfParseOptions_t *parseoptions)
 261 {
 262         GSList *l;
 263         GO_SLIST_SORT (parseoptions->terminator, (GCompareFunc)long_string_first);
 264
 265         parseoptions->compiled_terminator.min = 255;
 266         parseoptions->compiled_terminator.max = 0;
 267         for (l = parseoptions->terminator; l; l = l->next) {
 268                 const guchar *term = l->data;
 269                 parseoptions->compiled_terminator.min =
 270                         MIN (parseoptions->compiled_terminator.min, *term);
 271                 parseoptions->compiled_terminator.max =
 272                         MAX (parseoptions->compiled_terminator.max, *term);
 273         }
 274 }
 275
 276 /**
 277  * stf_parse_options_add_line_terminator:
 278  *
 279  * This will add to the line terminators, in both the Fixed width and CSV delimited importers
 280  * this indicates the end of a row.
 281  *
 282  **/
 283 void
 284 stf_parse_options_add_line_terminator (StfParseOptions_t *parseoptions, char const *terminator)
 285 {
 286         g_return_if_fail (parseoptions != NULL);
 287         g_return_if_fail (terminator != NULL && *terminator != 0);
 288
 289         GO_SLIST_PREPEND (parseoptions->terminator, g_strdup (terminator));
 290         compile_terminators (parseoptions);
 291 }
 292
 293 /**
 294  * stf_parse_options_clear_line_terminator:
 295  *
 296  * This will clear the line terminator, in both the Fixed width and CSV delimited importers
 297  * this indicates the end of a row.
 298  *
 299  **/
 300 void
 301 stf_parse_options_clear_line_terminator (StfParseOptions_t *parseoptions)
 302 {
 303         g_return_if_fail (parseoptions != NULL);
 304
 305         g_slist_free_full (parseoptions->terminator, g_free);
 306         parseoptions->terminator = NULL;
 307         compile_terminators (parseoptions);
 308 }
 309
 310 /**
 311  * stf_parse_options_set_trim_spaces:
 312  *
 313  * If enabled will trim spaces in every parsed field on left and/or right
 314  * sides.
 315  **/
 316 void
 317 stf_parse_options_set_trim_spaces (StfParseOptions_t *parseoptions, StfTrimType_t const trim_spaces)
 318 {
 319         g_return_if_fail (parseoptions != NULL);
 320
 321         parseoptions->trim_spaces = trim_spaces;
 322 }
 323
 324 /**
 325  * stf_parse_options_csv_set_separators:
 326  * @parseoptions: #StfParseOptions_t
 327  * @character:
 328  * @string: (element-type char):
 329  *
 330  * A copy is made of the parameters.
 331  **/
 332 void
 333 stf_parse_options_csv_set_separators (StfParseOptions_t *parseoptions, char const *character,
 334                                       GSList const *string)
 335 {
 336         g_return_if_fail (parseoptions != NULL);
 337
 338         g_free (parseoptions->sep.chr);
 339         parseoptions->sep.chr = g_strdup (character);
 340
 341         g_slist_free_full (parseoptions->sep.str, g_free);
 342         parseoptions->sep.str = go_slist_map (string, (GOMapFunc)g_strdup);
 343 }
 344
 345 void
 346 stf_parse_options_csv_set_stringindicator (StfParseOptions_t *parseoptions, gunichar const stringindicator)
 347 {
 348         g_return_if_fail (parseoptions != NULL);
 349
 350         parseoptions->stringindicator = stringindicator;
 351 }
 352
 353 /**
 354  * stf_parse_options_csv_set_indicator_2x_is_single:
 355  * @indic_2x: a boolean value indicating whether we want to see two
 356  *              adjacent string indicators as a single string indicator
 357  *              that is part of the cell, rather than a terminator.
 358  **/
 359 void
 360 stf_parse_options_csv_set_indicator_2x_is_single (StfParseOptions_t *parseoptions,
 361                                                   gboolean const indic_2x)
 362 {
 363         g_return_if_fail (parseoptions != NULL);
 364
 365         parseoptions->indicator_2x_is_single = indic_2x;
 366 }
 367
 368 /**
 369  * stf_parse_options_csv_set_duplicates:
 370  * @parseoptions:
 371  * @duplicates: a boolean value indicating whether we want to see two
 372  *               separators right behind each other as one
 373  **/
 374 void
 375 stf_parse_options_csv_set_duplicates (StfParseOptions_t *parseoptions, gboolean const duplicates)
 376 {
 377         g_return_if_fail (parseoptions != NULL);
 378
 379         parseoptions->sep.duplicates = duplicates;
 380 }
 381
 382 /**
 383  * stf_parse_options_csv_set_trim_seps:
 384  * @trim_seps: a boolean value indicating whether we want to ignore
 385  *               separators at the beginning of lines
 386  **/
 387 void
 388 stf_parse_options_csv_set_trim_seps (StfParseOptions_t *parseoptions, gboolean const trim_seps)
 389 {
 390         g_return_if_fail (parseoptions != NULL);
 391
 392         parseoptions->trim_seps = trim_seps;
 393 }
 394
 395 /**
 396  * stf_parse_options_fixed_splitpositions_clear:
 397  *
 398  * This will clear the splitpositions (== points on which a line is split)
 399  **/
 400 void
 401 stf_parse_options_fixed_splitpositions_clear (StfParseOptions_t *parseoptions)
 402 {
 403         int minus_one = -1;
 404         g_return_if_fail (parseoptions != NULL);
 405
 406         if (parseoptions->splitpositions)
 407                 g_array_free (parseoptions->splitpositions, TRUE);
 408         parseoptions->splitpositions = g_array_new (FALSE, FALSE, sizeof (int));
 409
 410         g_array_append_val (parseoptions->splitpositions, minus_one);
 411 }
 412
 413 /**
 414  * stf_parse_options_fixed_splitpositions_add:
 415  *
 416  * @position will be added to the splitpositions.
 417  **/
 418 void
 419 stf_parse_options_fixed_splitpositions_add (StfParseOptions_t *parseoptions, int position)
 420 {
 421         unsigned int ui;
 422
 423         g_return_if_fail (parseoptions != NULL);
 424         g_return_if_fail (position >= 0);
 425
 426         for (ui = 0; ui < parseoptions->splitpositions->len - 1; ui++) {
 427                 int here = g_array_index (parseoptions->splitpositions, int, ui);
 428                 if (position == here)
 429                         return;
 430                 if (position < here)
 431                         break;
 432         }
 433
 434         g_array_insert_val (parseoptions->splitpositions, ui, position);
 435 }
 436
 437 void
 438 stf_parse_options_fixed_splitpositions_remove (StfParseOptions_t *parseoptions, int position)
 439 {
 440         unsigned int ui;
 441
 442         g_return_if_fail (parseoptions != NULL);
 443         g_return_if_fail (position >= 0);
 444
 445         for (ui = 0; ui < parseoptions->splitpositions->len - 1; ui++) {
 446                 int here = g_array_index (parseoptions->splitpositions, int, ui);
 447                 if (position == here)
 448                         g_array_remove_index (parseoptions->splitpositions, ui);
 449                 if (position <= here)
 450                         return;
 451         }
 452 }
 453
 454 int
 455 stf_parse_options_fixed_splitpositions_count (StfParseOptions_t *parseoptions)
 456 {
 457         return parseoptions->splitpositions->len;
 458 }
 459
 460 int
 461 stf_parse_options_fixed_splitpositions_nth (StfParseOptions_t *parseoptions, int n)
 462 {
 463         return g_array_index (parseoptions->splitpositions, int, n);
 464 }
 465
 466
 467 /**
 468  * stf_parse_options_valid:
 469  * @parseoptions: an import options struct
 470  *
 471  * Checks if @parseoptions is correctly filled
 472  *
 473  * returns : TRUE if it is correctly filled, FALSE otherwise.
 474  **/
 475 static gboolean
 476 stf_parse_options_valid (StfParseOptions_t *parseoptions)
 477 {
 478         g_return_val_if_fail (parseoptions != NULL, FALSE);
 479
 480         if (parseoptions->parsetype == PARSE_TYPE_FIXED) {
 481                 if (!parseoptions->splitpositions) {
 482                         g_warning ("STF: No splitpositions in struct");
 483                         return FALSE;
 484                 }
 485         }
 486
 487         return TRUE;
 488 }
 489
 490 /*******************************************************************************************************
 491  * STF PARSE : The actual routines that do the 'trick'
 492  *******************************************************************************************************/
 493
 494 static void
 495 trim_spaces_inplace (char *field, StfParseOptions_t const *parseoptions)
 496 {
 497         if (!field) return;
 498
 499         if (parseoptions->trim_spaces & TRIM_TYPE_LEFT) {
 500                 char *s = field;
 501
 502                 while (g_unichar_isspace (g_utf8_get_char (s)))
 503                         s = g_utf8_next_char (s);
 504
 505                 if (s != field)
 506                         memmove (field, s, 1 + strlen (s));
 507         }
 508
 509         if (parseoptions->trim_spaces & TRIM_TYPE_RIGHT) {
 510                 char *s = field + strlen (field);
 511
 512                 while (field != s) {
 513                         s = g_utf8_prev_char (s);
 514                         if (!g_unichar_isspace (g_utf8_get_char (s)))
 515                                 break;
 516                         *s = 0;
 517                 }
 518         }
 519 }
 520
 521 /**
 522  * stf_parse_csv_is_separator:
 523  *
 524  * returns NULL if @character is not a separator, a pointer to the character
 525  * after the separator otherwise.
 526  **/
 527 static char const *
 528 stf_parse_csv_is_separator (char const *character, char const *chr, GSList const *str)
 529 {
 530         g_return_val_if_fail (character != NULL, NULL);
 531
 532         if (*character == 0)
 533                 return NULL;
 534
 535         if (str) {
 536                 GSList const *l;
 537
 538                 for (l = str; l != NULL; l = l->next) {
 539                         char const *s = l->data;
 540                         char const *r;
 541                         glong cnt;
 542                         glong const len = g_utf8_strlen (s, -1);
 543
 544                         /* Don't compare past the end of the buffer! */
 545                         for (r = character, cnt = 0; cnt < len; cnt++, r = g_utf8_next_char (r))
 546                                 if (*r == '\0')
 547                                         break;
 548
 549                         if ((cnt == len) && (memcmp (character, s, len) == 0))
 550                                 return g_utf8_offset_to_pointer (character, len);
 551                 }
 552         }
 553
 554         if (chr && my_utf8_strchr (chr, g_utf8_get_char (character)))
 555                 return g_utf8_next_char(character);
 556
 557         return NULL;
 558 }
 559
 560 /*
 561  * stf_parse_eat_separators:
 562  *
 563  * skip over leading separators
 564  *
 565  */
 566
 567 static void
 568 stf_parse_eat_separators (Source_t *src, StfParseOptions_t *parseoptions)
 569 {
 570         char const *cur, *next;
 571
 572         g_return_if_fail (src != NULL);
 573         g_return_if_fail (parseoptions != NULL);
 574
 575         cur = src->position;
 576
 577         if (*cur == '\0' || compare_terminator (cur, parseoptions))
 578                 return;
 579         while ((next = stf_parse_csv_is_separator (cur, parseoptions->sep.chr, parseoptions->sep.str)))
 580                 cur = next;
 581         src->position = cur;
 582         return;
 583 }
 584
 585
 586 typedef enum {
 587         STF_CELL_ERROR,
 588         STF_CELL_EOF,
 589         STF_CELL_EOL,
 590         STF_CELL_FIELD_NO_SEP,
 591         STF_CELL_FIELD_SEP
 592 } StfParseCellRes;
 593
 594 static StfParseCellRes
 595 stf_parse_csv_cell (GString *text, Source_t *src, StfParseOptions_t *parseoptions)
 596 {
 597         char const *cur;
 598         gboolean saw_sep = FALSE;
 599
 600         g_return_val_if_fail (src != NULL, STF_CELL_ERROR);
 601         g_return_val_if_fail (parseoptions != NULL, STF_CELL_ERROR);
 602
 603         cur = src->position;
 604         g_return_val_if_fail (cur != NULL, STF_CELL_ERROR);
 605
 606         /* Skip whitespace, but stop at line terminators.  */
 607         while (1) {
 608                 int term_len;
 609
 610                 if (*cur == 0) {
 611                         src->position = cur;
 612                         return STF_CELL_EOF;
 613                 }
 614
 615                 term_len = compare_terminator (cur, parseoptions);
 616                 if (term_len) {
 617                         src->position = cur + term_len;
 618                         return STF_CELL_EOL;
 619                 }
 620
 621                 if ((parseoptions->trim_spaces & TRIM_TYPE_LEFT) == 0)
 622                         break;
 623
 624                 if (stf_parse_csv_is_separator (cur, parseoptions->sep.chr,
 625                                                 parseoptions->sep.str))
 626                         break;
 627
 628                 if (!g_unichar_isspace (g_utf8_get_char (cur)))
 629                         break;
 630                 cur = g_utf8_next_char (cur);
 631         }
 632
 633         if (parseoptions->stringindicator != 0 &&
 634             g_utf8_get_char (cur) == parseoptions->stringindicator) {
 635                 cur = g_utf8_next_char (cur);
 636                 while (*cur) {
 637                         gunichar uc = g_utf8_get_char (cur);
 638                         cur = g_utf8_next_char (cur);
 639
 640                         if (uc == parseoptions->stringindicator) {
 641                                 if (parseoptions->indicator_2x_is_single &&
 642                                     g_utf8_get_char (cur) == parseoptions->stringindicator)
 643                                         cur = g_utf8_next_char (cur);
 644                                 else {
 645                                         /* "field content"dropped-garbage,  */
 646                                         while (*cur && !compare_terminator (cur, parseoptions)) {
 647                                                 char const *post = stf_parse_csv_is_separator
 648                                                         (cur, parseoptions->sep.chr, parseoptions->sep.str);
 649                                                 if (post) {
 650                                                         cur = post;
 651                                                         saw_sep = TRUE;
 652                                                         break;
 653                                                 }
 654                                                 cur = g_utf8_next_char (cur);
 655                                         }
 656                                         break;
 657                                 }
 658                         }
 659
 660                         g_string_append_unichar (text, uc);
 661                 }
 662
 663                 /* We silently allow a missing terminating quote.  */
 664         } else {
 665                 /* Unquoted field.  */
 666
 667                 while (*cur && !compare_terminator (cur, parseoptions)) {
 668
 669                         char const *post = stf_parse_csv_is_separator
 670                                 (cur, parseoptions->sep.chr, parseoptions->sep.str);
 671                         if (post) {
 672                                 cur = post;
 673                                 saw_sep = TRUE;
 674                                 break;
 675                         }
 676
 677                         g_string_append_unichar (text, g_utf8_get_char (cur));
 678                         cur = g_utf8_next_char (cur);
 679                 }
 680
 681                 if (parseoptions->trim_spaces & TRIM_TYPE_RIGHT) {
 682                         while (text->len) {
 683                                 const char *last = g_utf8_prev_char (text->str + text->len);
 684                                 if (!g_unichar_isspace (g_utf8_get_char (last)))
 685                                         break;
 686                                 g_string_truncate (text, last - text->str);
 687                         }
 688                 }
 689         }
 690
 691         src->position = cur;
 692
 693         if (saw_sep && parseoptions->sep.duplicates)
 694                 stf_parse_eat_separators (src, parseoptions);
 695
 696         return saw_sep ? STF_CELL_FIELD_SEP : STF_CELL_FIELD_NO_SEP;
 697 }
 698
 699 /**
 700  * stf_parse_csv_line:
 701  *
 702  * This will parse one line from the current @src->position.
 703  * NOTE: The calling routine is responsible for freeing the result.
 704  *
 705  * returns : a GPtrArray of char*'s
 706  **/
 707 static GPtrArray *
 708 stf_parse_csv_line (Source_t *src, StfParseOptions_t *parseoptions)
 709 {
 710         GPtrArray *line;
 711         gboolean cont = FALSE;
 712         GString *text;
 713
 714         g_return_val_if_fail (src != NULL, NULL);
 715         g_return_val_if_fail (parseoptions != NULL, NULL);
 716
 717         line = g_ptr_array_new ();
 718         if (parseoptions->trim_seps)
 719                 stf_parse_eat_separators (src, parseoptions);
 720
 721         text = g_string_sized_new (30);
 722
 723         while (1) {
 724                 char *ctext;
 725                 StfParseCellRes res =
 726                         stf_parse_csv_cell (text, src, parseoptions);
 727                 trim_spaces_inplace (text->str, parseoptions);
 728                 ctext = g_string_chunk_insert_len (src->chunk,
 729                                                    text->str, text->len);
 730                 g_string_truncate (text, 0);
 731
 732                 switch (res) {
 733                 case STF_CELL_FIELD_NO_SEP:
 734                         g_ptr_array_add (line, ctext);
 735                         cont = FALSE;
 736                         break;
 737
 738                 case STF_CELL_FIELD_SEP:
 739                         g_ptr_array_add (line, ctext);
 740                         cont = TRUE;  /* Make sure we see one more field.  */
 741                         break;
 742
 743                 default:
 744                         if (cont)
 745                                 g_ptr_array_add (line, ctext);
 746                         g_string_free (text, TRUE);
 747                         return line;
 748                 }
 749         }
 750 }
 751
 752 /**
 753  * stf_parse_fixed_cell:
 754  *
 755  * returns a pointer to the parsed cell contents.
 756  **/
 757 static char *
 758 stf_parse_fixed_cell (Source_t *src, StfParseOptions_t *parseoptions)
 759 {
 760         char *res;
 761         char const *cur;
 762         int splitval;
 763
 764         g_return_val_if_fail (src != NULL, NULL);
 765         g_return_val_if_fail (parseoptions != NULL, NULL);
 766
 767         cur = src->position;
 768
 769         if (src->splitpos < my_garray_len (parseoptions->splitpositions))
 770                 splitval = (int) g_array_index (parseoptions->splitpositions, int, src->splitpos);
 771         else
 772                 splitval = -1;
 773
 774         while (*cur != 0 && !compare_terminator (cur, parseoptions) && splitval != src->linepos) {
 775                 src->linepos++;
 776                 cur = g_utf8_next_char (cur);
 777         }
 778
 779         res = g_string_chunk_insert_len (src->chunk,
 780                                          src->position,
 781                                          cur - src->position);
 782
 783         src->position = cur;
 784
 785         return res;
 786 }
 787
 788 /**
 789  * stf_parse_fixed_line:
 790  *
 791  * This will parse one line from the current @src->position.
 792  * It will return a GPtrArray with the cell contents as strings.
 793
 794  * NOTE: The calling routine is responsible for freeing result.
 795  **/
 796 static GPtrArray *
 797 stf_parse_fixed_line (Source_t *src, StfParseOptions_t *parseoptions)
 798 {
 799         GPtrArray *line;
 800
 801         g_return_val_if_fail (src != NULL, NULL);
 802         g_return_val_if_fail (parseoptions != NULL, NULL);
 803
 804         src->linepos = 0;
 805         src->splitpos = 0;
 806
 807         line = g_ptr_array_new ();
 808         while (*src->position != '\0' && !compare_terminator (src->position, parseoptions)) {
 809                 char *field = stf_parse_fixed_cell (src, parseoptions);
 810
 811                 trim_spaces_inplace (field, parseoptions);
 812                 g_ptr_array_add (line, field);
 813
 814                 src->splitpos++;
 815         }
 816
 817         while (line->len < parseoptions->splitpositions->len)
 818                 g_ptr_array_add (line, g_strdup (""));
 819
 820         return line;
 821 }
 822
 823 /**
 824  * stf_parse_general_free: (skip)
 825  */
 826 void
 827 stf_parse_general_free (GPtrArray *lines)
 828 {
 829         unsigned lineno;
 830         for (lineno = 0; lineno < lines->len; lineno++) {
 831                 GPtrArray *line = g_ptr_array_index (lines, lineno);
 832                 /* Fields are not freed here.  */
 833                 if (line)
 834                         g_ptr_array_free (line, TRUE);
 835         }
 836         g_ptr_array_free (lines, TRUE);
 837 }
 838
 839
 840 /**
 841  * stf_parse_general: (skip)
 842  *
 843  * Returns: (transfer full): a GPtrArray of lines, where each line is itself a
 844  * GPtrArray of strings.
 845  *
 846  * The caller must free this entire structure, for example by calling
 847  * stf_parse_general_free.
 848  **/
 849 GPtrArray *
 850 stf_parse_general (StfParseOptions_t *parseoptions,
 851                    GStringChunk *lines_chunk,
 852                    char const *data, char const *data_end)
 853 {
 854         GPtrArray *lines;
 855         Source_t src;
 856         int row;
 857         char const *valid_end = data_end;
 858
 859         g_return_val_if_fail (parseoptions != NULL, NULL);
 860         g_return_val_if_fail (data != NULL, NULL);
 861         g_return_val_if_fail (data_end != NULL, NULL);
 862         g_return_val_if_fail (stf_parse_options_valid (parseoptions), NULL);
 863         g_return_val_if_fail (g_utf8_validate (data, data_end-data, &valid_end), NULL);
 864
 865         src.chunk = lines_chunk;
 866         src.position = data;
 867         row = 0;
 868
 869         if ((data_end-data >= 3) && !strncmp(src.position, "\xEF\xBB\xBF", 3)) {
 870                 /* Skip over byte-order mark */
 871                 src.position += 3;
 872         }
 873
 874         lines = g_ptr_array_new ();
 875         while (*src.position != '\0' && src.position < data_end) {
 876                 GPtrArray *line;
 877
 878                 if (row == GNM_MAX_ROWS) {
 879                         parseoptions->rows_exceeded = TRUE;
 880                         break;
 881                 }
 882
 883                 line = parseoptions->parsetype == PARSE_TYPE_CSV
 884                         ? stf_parse_csv_line (&src, parseoptions)
 885                         : stf_parse_fixed_line (&src, parseoptions);
 886
 887                 g_ptr_array_add (lines, line);
 888                 if (parseoptions->parsetype != PARSE_TYPE_CSV)
 889                         src.position += compare_terminator (src.position, parseoptions);
 890                 row++;
 891         }
 892
 893         return lines;
 894 }
 895
 896 /**
 897  * stf_parse_lines: (skip)
 898  * @parseoptions: #StfParseOptions_t
 899  * @lines_chunk:
 900  * @data:
 901  * @maxlines:
 902  * @with_lineno:
 903  *
 904  * Returns: (transfer full): a GPtrArray of lines, where each line is itself a
 905  * GPtrArray of strings.
 906  *
 907  * The caller must free this entire structure, for example by calling
 908  * stf_parse_general_free.
 909  **/
 910 GPtrArray *
 911 stf_parse_lines (StfParseOptions_t *parseoptions,
 912                  GStringChunk *lines_chunk,
 913                  char const *data,
 914                  int maxlines, gboolean with_lineno)
 915 {
 916         GPtrArray *lines;
 917         int lineno = 1;
 918
 919         g_return_val_if_fail (data != NULL, NULL);
 920
 921         lines = g_ptr_array_new ();
 922         while (*data) {
 923                 char const *data0 = data;
 924                 GPtrArray *line = g_ptr_array_new ();
 925
 926                 if (with_lineno) {
 927                         char buf[4 * sizeof (int)];
 928                         sprintf (buf, "%d", lineno);
 929                         g_ptr_array_add (line,
 930                                          g_string_chunk_insert (lines_chunk, buf));
 931                 }
 932
 933                 while (1) {
 934                         int termlen = compare_terminator (data, parseoptions);
 935                         if (termlen > 0 || *data == 0) {
 936                                 g_ptr_array_add (line,
 937                                                  g_string_chunk_insert_len (lines_chunk,
 938                                                                             data0,
 939                                                                             data - data0));
 940                                 data += termlen;
 941                                 break;
 942                         } else
 943                                 data = g_utf8_next_char (data);
 944                 }
 945
 946                 g_ptr_array_add (lines, line);
 947
 948                 lineno++;
 949                 if (lineno >= maxlines)
 950                         break;
 951         }
 952         return lines;
 953 }
 954
 955 char const *
 956 stf_parse_find_line (StfParseOptions_t *parseoptions,
 957                      char const *data,
 958                      int line)
 959 {
 960         while (line > 0) {
 961                 int termlen = compare_terminator (data, parseoptions);
 962                 if (termlen > 0) {
 963                         data += termlen;
 964                         line--;
 965                 } else if (*data == 0) {
 966                         return data;
 967                 } else {
 968                         data = g_utf8_next_char (data);
 969                 }
 970         }
 971         return data;
 972 }
 973
 974
 975 /**
 976  * stf_parse_options_fixed_autodiscover:
 977  * @parseoptions: a Parse options struct.
 978  * @data: The actual data.
 979  * @data_end: data end.
 980  *
 981  * Automatically try to discover columns in the text to be parsed.
 982  * We ignore empty lines (only containing parseoptions->terminator)
 983  *
 984  * FIXME: This is so extremely ugly that I am too tired to rewrite it right now.
 985  *        Think hard of a better more flexible solution...
 986  **/
 987 void
 988 stf_parse_options_fixed_autodiscover (StfParseOptions_t *parseoptions,
 989                                       char const *data, char const *data_end)
 990 {
 991         char const *iterator = data;
 992         GSList *list = NULL;
 993         GSList *list_start = NULL;
 994         int lines = 0;
 995         int effective_lines = 0;
 996         int max_line_length = 0;
 997         int *line_begin_hits = NULL;
 998         int *line_end_hits = NULL;
 999         int i;
1000
1001         stf_parse_options_fixed_splitpositions_clear (parseoptions);
1002
1003         /*
1004          * First take a look at all possible white space combinations
1005          */
1006         while (*iterator && iterator < data_end) {
1007                 gboolean begin_recorded = FALSE;
1008                 AutoDiscovery_t *disc = NULL;
1009                 int position = 0;
1010                 int termlen = 0;
1011
1012                 while (*iterator && (termlen = compare_terminator (iterator, parseoptions)) == 0) {
1013                         if (!begin_recorded && *iterator == ' ') {
1014                                 disc = g_new0 (AutoDiscovery_t, 1);
1015
1016                                 disc->start = position;
1017
1018                                 begin_recorded = TRUE;
1019                         } else if (begin_recorded && *iterator != ' ') {
1020                                 disc->stop = position;
1021                                 list = g_slist_prepend (list, disc);
1022
1023                                 begin_recorded = FALSE;
1024                                 disc = NULL;
1025                         }
1026
1027                         position++;
1028                         iterator++;
1029                 }
1030
1031                 if (position > max_line_length)
1032                         max_line_length = position;
1033
1034                 /*
1035                  * If there are excess spaces at the end of
1036                  * the line : ignore them
1037                  */
1038                 g_free (disc);
1039
1040                 /*
1041                  * Hop over the terminator
1042                  */
1043                 iterator += termlen;
1044
1045                 if (position != 0)
1046                         effective_lines++;
1047
1048                 lines++;
1049         }
1050
1051         list       = g_slist_reverse (list);
1052         list_start = list;
1053
1054         /*
1055          * Kewl stuff :
1056          * Look at the number of hits at each line position
1057          * if the number of hits equals the number of lines
1058          * we can be pretty sure this is the start or end
1059          * of a column, we filter out empty columns
1060          * later
1061          */
1062         line_begin_hits = g_new0 (int, max_line_length + 1);
1063         line_end_hits   = g_new0 (int, max_line_length + 1);
1064
1065         while (list) {
1066                 AutoDiscovery_t *disc = list->data;
1067
1068                 line_begin_hits[disc->start]++;
1069                 line_end_hits[disc->stop]++;
1070
1071                 g_free (disc);
1072
1073                 list = g_slist_next (list);
1074         }
1075         g_slist_free (list_start);
1076
1077         for (i = 0; i < max_line_length + 1; i++)
1078                 if (line_begin_hits[i] == effective_lines || line_end_hits[i] == effective_lines)
1079                         stf_parse_options_fixed_splitpositions_add (parseoptions, i);
1080
1081         /*
1082          * Do some corrections to the initial columns
1083          * detected here, we obviously don't need to
1084          * do this if there are no columns at all.
1085          */
1086         if (my_garray_len (parseoptions->splitpositions) > 0) {
1087                 /*
1088                  * Try to find columns that look like :
1089                  *
1090                  * Example     100
1091                  * Example2      9
1092                  *
1093                  * (In other words : Columns with left & right justification with
1094                  *  a minimum of 2 spaces in the middle)
1095                  * Split these columns in 2
1096                  */
1097
1098                 for (i = 0; i < my_garray_len (parseoptions->splitpositions) - 1; i++) {
1099                         int begin = g_array_index (parseoptions->splitpositions, int, i);
1100                         int end   = g_array_index (parseoptions->splitpositions, int, i + 1);
1101                         int num_spaces   = -1;
1102                         int spaces_start = 0;
1103                         gboolean right_aligned = TRUE;
1104                         gboolean left_aligned  = TRUE;
1105                         gboolean has_2_spaces  = TRUE;
1106
1107                         iterator = data;
1108                         lines = 0;
1109                         while (*iterator && iterator < data_end) {
1110                                 gboolean trigger = FALSE;
1111                                 gboolean space_trigger = FALSE;
1112                                 int pos = 0;
1113
1114                                 num_spaces   = -1;
1115                                 spaces_start = 0;
1116                                 while (*iterator && !compare_terminator (iterator, parseoptions)) {
1117                                         if (pos == begin) {
1118                                                 if (*iterator == ' ')
1119                                                         left_aligned = FALSE;
1120
1121                                                 trigger = TRUE;
1122                                         } else if (pos == end - 1) {
1123                                                 if (*iterator == ' ')
1124                                                         right_aligned = FALSE;
1125
1126                                                 trigger = FALSE;
1127                                         }
1128
1129                                         if (trigger || pos == end - 1) {
1130                                                 if (!space_trigger && *iterator == ' ') {
1131                                                         space_trigger = TRUE;
1132                                                         spaces_start = pos;
1133                                                 } else if (space_trigger && *iterator != ' ') {
1134                                                         space_trigger = FALSE;
1135                                                         num_spaces = pos - spaces_start;
1136                                                 }
1137                                         }
1138
1139                                         iterator++;
1140                                         pos++;
1141                                 }
1142
1143                                 if (num_spaces < 2)
1144                                         has_2_spaces = FALSE;
1145
1146                                 if (*iterator)
1147                                         iterator++;
1148
1149                                 lines++;
1150                         }
1151
1152                         /*
1153                          * If this column meets all the criteria
1154                          * split it into two at the last measured
1155                          * spaces_start + num_spaces
1156                          */
1157                         if (has_2_spaces && right_aligned && left_aligned) {
1158                                 int val = (((spaces_start + num_spaces) - spaces_start) / 2) + spaces_start;
1159
1160                                 g_array_insert_val (parseoptions->splitpositions, i + 1, val);
1161
1162                                 /*
1163                                  * Skip over the inserted column
1164                                  */
1165                                 i++;
1166                         }
1167                 }
1168
1169                 /*
1170                  * Remove empty columns here if needed
1171                  */
1172                 for (i = 0; i < my_garray_len (parseoptions->splitpositions) - 1; i++) {
1173                         int begin = g_array_index (parseoptions->splitpositions, int, i);
1174                         int end = g_array_index (parseoptions->splitpositions, int, i + 1);
1175                         gboolean only_spaces = TRUE;
1176
1177                         iterator = data;
1178                         lines = 0;
1179                         while (*iterator && iterator < data_end) {
1180                                 gboolean trigger = FALSE;
1181                                 int pos = 0;
1182
1183                                 while (*iterator && !compare_terminator (iterator, parseoptions)) {
1184                                         if (pos == begin)
1185                                                 trigger = TRUE;
1186                                         else if (pos == end)
1187                                                 trigger = FALSE;
1188
1189                                         if (trigger) {
1190                                                 if (*iterator != ' ')
1191                                                         only_spaces = FALSE;
1192                                         }
1193
1194                                         iterator++;
1195                                         pos++;
1196                                 }
1197
1198                                 if (*iterator)
1199                                         iterator++;
1200
1201                                 lines++;
1202                         }
1203
1204                         /*
1205                          * The column only contains spaces
1206                          * remove it
1207                          */
1208                         if (only_spaces) {
1209                                 g_array_remove_index (parseoptions->splitpositions, i);
1210
1211                                 /*
1212                                  * We HAVE to make sure that the next column (end) also
1213                                  * gets checked out. If we don't decrease "i" here, we
1214                                  * will skip over it as the indexes shift down after
1215                                  * the removal
1216                                  */
1217                                 i--;
1218                         }
1219                 }
1220         }
1221
1222         g_free (line_begin_hits);
1223         g_free (line_end_hits);
1224 }
1225
1226 /*******************************************************************************************************
1227  * STF PARSE HL: high-level functions that dump the raw data returned by the low-level parsing
1228  *               functions into something meaningful (== application specific)
1229  *******************************************************************************************************/
1230
1231 /*
1232  * This is more or less as gnm_cell_set_text, except...
1233  * 1. Unknown names are not allowed.
1234  * 2. Only '=' can start an expression.
1235  */
1236
1237 static void
1238 stf_cell_set_text (GnmCell *cell, char const *text)
1239 {
1240         GnmExprTop const *texpr;
1241         GnmValue *val;
1242         GOFormat const *fmt = gnm_style_get_format (gnm_cell_get_style (cell));
1243         const GODateConventions *date_conv =
1244                 workbook_date_conv (cell->base.sheet->workbook);
1245
1246         if (!go_format_is_text (fmt) && *text == '=' && text[1] != 0) {
1247                 GnmExprParseFlags flags =
1248                         GNM_EXPR_PARSE_UNKNOWN_NAMES_ARE_INVALID;
1249                 const char *expr_start = text + 1;
1250                 GnmParsePos pos;
1251                 val = NULL;
1252                 parse_pos_init_cell (&pos, cell);
1253                 texpr = gnm_expr_parse_str (expr_start, &pos, flags,
1254                                             NULL, NULL);
1255         } else {
1256                 texpr = NULL;
1257                 val = format_match (text, fmt, date_conv);
1258         }
1259
1260         if (!val && !texpr)
1261                 val = value_new_string (text);
1262
1263         if (val)
1264                 gnm_cell_set_value (cell, val);
1265         else {
1266                 gnm_cell_set_expr (cell, texpr);
1267                 gnm_expr_top_unref (texpr);
1268         }
1269 }
1270
1271 static void
1272 stf_read_remember_settings (Workbook *book, StfParseOptions_t *po)
1273 {
1274         if (po->parsetype == PARSE_TYPE_CSV) {
1275                 GnmStfExport *stfe = gnm_stf_get_stfe (G_OBJECT (book));
1276                 char quote[6];
1277                 int length = g_unichar_to_utf8 (po->stringindicator, quote);
1278                 if (length > 5) {
1279                         quote[0] = '"';
1280                         quote[1] = '\0';
1281                 } else quote[length] = '\0';
1282
1283                 g_object_set (G_OBJECT (stfe), "separator", po->sep.chr, "quote", &quote, NULL);
1284
1285                 if ((po->terminator != NULL) &&  (po->terminator->data != NULL))
1286                         g_object_set (G_OBJECT (stfe), "eol", po->terminator->data, NULL);
1287         }
1288 }
1289
1290 gboolean
1291 stf_parse_sheet (StfParseOptions_t *parseoptions,
1292                  char const *data, char const *data_end,
1293                  Sheet *sheet, int start_col, int start_row)
1294 {
1295         int row;
1296         unsigned int lrow;
1297         GStringChunk *lines_chunk;
1298         GPtrArray *lines;
1299         gboolean result = TRUE;
1300         int col;
1301         unsigned int lcol;
1302
1303         SETUP_LOCALE_SWITCH;
1304
1305         g_return_val_if_fail (parseoptions != NULL, FALSE);
1306         g_return_val_if_fail (data != NULL, FALSE);
1307         g_return_val_if_fail (IS_SHEET (sheet), FALSE);
1308
1309         if (!data_end)
1310                 data_end = data + strlen (data);
1311
1312         lines_chunk = g_string_chunk_new (100 * 1024);
1313         lines = stf_parse_general (parseoptions, lines_chunk, data, data_end);
1314         if (lines == NULL)
1315                 result = FALSE;
1316
1317         col = start_col;
1318         for (lcol = 0; lcol < parseoptions->formats->len; lcol++) {
1319                 GOFormat const *fmt = g_ptr_array_index (parseoptions->formats, lcol);
1320                 GnmStyle *mstyle;
1321                 gboolean want_col =
1322                         (parseoptions->col_import_array == NULL ||
1323                          parseoptions->col_import_array_len <= lcol ||
1324                          parseoptions->col_import_array[lcol]);
1325                 if (!want_col || col >= gnm_sheet_get_max_cols (sheet))
1326                         continue;
1327
1328                 if (fmt && !go_format_is_general (fmt)) {
1329                         GnmRange r;
1330                         int end_row = MIN (start_row + (int)lines->len - 1,
1331                                            gnm_sheet_get_last_row (sheet));
1332
1333                         range_init (&r, col, start_row, col, end_row);
1334                         mstyle = gnm_style_new ();
1335                         gnm_style_set_format (mstyle, fmt);
1336                         sheet_apply_style (sheet, &r, mstyle);
1337                 }
1338                 col++;
1339         }
1340
1341         START_LOCALE_SWITCH;
1342         for (row = start_row, lrow = 0;
1343              result && lrow < lines->len;
1344              row++, lrow++) {
1345                 GPtrArray *line;
1346
1347                 if (row >= gnm_sheet_get_max_rows (sheet)) {
1348                         if (!parseoptions->rows_exceeded) {
1349                                 /* FIXME: What locale?  */
1350                                 g_warning (_("There are more rows of data than "
1351                                              "there is room for in the sheet.  Extra "
1352                                              "rows will be ignored."));
1353                                 parseoptions->rows_exceeded = TRUE;
1354                         }
1355                         break;
1356                 }
1357
1358                 col = start_col;
1359                 line = g_ptr_array_index (lines, lrow);
1360
1361                 for (lcol = 0; lcol < line->len; lcol++) {
1362                         GOFormat const *fmt = g_ptr_array_index (parseoptions->formats, lcol);
1363                         char const *text = g_ptr_array_index (line, lcol);
1364                         gboolean want_col =
1365                                 (parseoptions->col_import_array == NULL ||
1366                                  parseoptions->col_import_array_len <= lcol ||
1367                                  parseoptions->col_import_array[lcol]);
1368                         if (!want_col)
1369                                 continue;
1370
1371                         if (col >= gnm_sheet_get_max_cols (sheet)) {
1372                                 if (!parseoptions->cols_exceeded) {
1373                                         /* FIXME: What locale?  */
1374                                         g_warning (_("There are more columns of data than "
1375                                                      "there is room for in the sheet.  Extra "
1376                                                      "columns will be ignored."));
1377                                         parseoptions->cols_exceeded = TRUE;
1378                                 }
1379                                 break;
1380                         }
1381                         if (text && *text) {
1382                                 GnmCell *cell = sheet_cell_fetch (sheet, col, row);
1383                                 if (!go_format_is_text (fmt) &&
1384                                     lcol < parseoptions->formats_decimal->len &&
1385                                     g_ptr_array_index (parseoptions->formats_decimal, lcol)) {
1386                                         GOFormatFamily fam;
1387                                         GnmValue *v = format_match_decimal_number_with_locale
1388                                                 (text, &fam,
1389                                                  g_ptr_array_index (parseoptions->formats_curr, lcol),
1390                                                  g_ptr_array_index (parseoptions->formats_thousand, lcol),
1391                                                  g_ptr_array_index (parseoptions->formats_decimal, lcol));
1392                                         if (!v)
1393                                                 v = value_new_string (text);
1394                                         sheet_cell_set_value (cell, v);
1395                                 } else {
1396
1397                                         stf_cell_set_text (cell, text);
1398                                 }
1399                         }
1400                         col++;
1401                 }
1402
1403                 g_ptr_array_index (lines, lrow) = NULL;
1404                 g_ptr_array_free (line, TRUE);
1405         }
1406         END_LOCALE_SWITCH;
1407
1408         for (lcol = 0, col = start_col;
1409              lcol < parseoptions->col_import_array_len  && col < gnm_sheet_get_max_cols (sheet);
1410              lcol++) {
1411                 if (parseoptions->col_import_array == NULL ||
1412                     parseoptions->col_import_array_len <= lcol ||
1413                     parseoptions->col_import_array[lcol]) {
1414                         if (parseoptions->col_autofit_array == NULL ||
1415                             parseoptions->col_autofit_array[lcol]) {
1416                                 ColRowIndexList *list = colrow_get_index_list (col, col, NULL);
1417                                 ColRowStateGroup  *state = colrow_set_sizes (sheet, TRUE, list, -1, 0, -1);
1418                                 colrow_index_list_destroy (list);
1419                                 g_slist_free (state);
1420                         }
1421                         col++;
1422                 }
1423         }
1424
1425         g_string_chunk_free (lines_chunk);
1426         if (lines)
1427                 stf_parse_general_free (lines);
1428         if (result)
1429                 stf_read_remember_settings (sheet->workbook, parseoptions);
1430         return result;
1431 }
1432
1433 GnmCellRegion *
1434 stf_parse_region (StfParseOptions_t *parseoptions, char const *data, char const *data_end,
1435                   Workbook const *wb)
1436 {
1437         static GODateConventions const default_conv = {FALSE};
1438         GODateConventions const *date_conv = wb ? workbook_date_conv (wb) : &default_conv;
1439
1440         GnmCellRegion *cr;
1441         unsigned int row, colhigh = 0;
1442         GStringChunk *lines_chunk;
1443         GPtrArray *lines;
1444         size_t nformats;
1445
1446         SETUP_LOCALE_SWITCH;
1447
1448         g_return_val_if_fail (parseoptions != NULL, NULL);
1449         g_return_val_if_fail (data != NULL, NULL);
1450
1451         START_LOCALE_SWITCH;
1452
1453         cr = gnm_cell_region_new (NULL);
1454
1455         if (!data_end)
1456                 data_end = data + strlen (data);
1457         lines_chunk = g_string_chunk_new (100 * 1024);
1458         lines = stf_parse_general (parseoptions, lines_chunk, data, data_end);
1459         nformats = parseoptions->formats->len;
1460         for (row = 0; row < lines->len; row++) {
1461                 GPtrArray *line = g_ptr_array_index (lines, row);
1462                 unsigned int col, targetcol = 0;
1463                 for (col = 0; col < line->len; col++) {
1464                         if (parseoptions->col_import_array == NULL ||
1465                             parseoptions->col_import_array_len <= col ||
1466                             parseoptions->col_import_array[col]) {
1467                                 const char *text = g_ptr_array_index (line, col);
1468                                 if (text) {
1469                                         GOFormat *fmt = NULL;
1470                                         GnmValue *v;
1471                                         GnmCellCopy *cc;
1472
1473                                         if (col < nformats)
1474                                                 fmt = g_ptr_array_index (parseoptions->formats, col);
1475                                         v = format_match (text, fmt, date_conv);
1476                                         if (!v)
1477                                                 v = value_new_string (text);
1478
1479                                         cc = gnm_cell_copy_new (cr, targetcol, row);
1480                                         cc->val  = v;
1481                                         cc->texpr = NULL;
1482                                         targetcol++;
1483                                         if (targetcol > colhigh)
1484                                                 colhigh = targetcol;
1485                                 }
1486                         }
1487                 }
1488         }
1489         stf_parse_general_free (lines);
1490         g_string_chunk_free (lines_chunk);
1491
1492         END_LOCALE_SWITCH;
1493
1494         cr->cols    = (colhigh > 0) ? colhigh : 1;
1495         cr->rows    = row;
1496
1497         return cr;
1498 }
1499
1500 static int
1501 int_sort (void const *a, void const *b)
1502 {
1503         return *(int const *)a - *(int const *)b;
1504 }
1505
1506 static int
1507 count_character (GPtrArray *lines, gunichar c, double quantile)
1508 {
1509         int *counts, res;
1510         unsigned int lno, cno;
1511
1512         if (lines->len == 0)
1513                 return 0;
1514
1515         counts = g_new (int, lines->len);
1516         for (lno = cno = 0; lno < lines->len; lno++) {
1517                 int count = 0;
1518                 GPtrArray *boxline = g_ptr_array_index (lines, lno);
1519                 char const *line = g_ptr_array_index (boxline, 0);
1520
1521                 /* Ignore empty lines.  */
1522                 if (*line == 0)
1523                         continue;
1524
1525                 while (*line) {
1526                         if (g_utf8_get_char (line) == c)
1527                                 count++;
1528                         line = g_utf8_next_char (line);
1529                 }
1530
1531                 counts[cno++] = count;
1532         }
1533
1534         if (cno == 0)
1535                 res = 0;
1536         else {
1537                 unsigned int qi = (unsigned int)ceil (quantile * cno);
1538                 qsort (counts, cno, sizeof (counts[0]), int_sort);
1539                 if (qi == cno)
1540                         qi--;
1541                 res = counts[qi];
1542         }
1543
1544         g_free (counts);
1545
1546         return res;
1547 }
1548
1549 static void
1550 dump_guessed_options (const StfParseOptions_t *res)
1551 {
1552         GSList *l;
1553         char ubuffer[6 + 1];
1554         unsigned ui;
1555
1556         g_printerr ("Guessed format:\n");
1557         switch (res->parsetype) {
1558         case PARSE_TYPE_CSV:
1559                 g_printerr ("  type = sep\n");
1560                 g_printerr ("  separator = %s\n",
1561                             res->sep.chr ? res->sep.chr : "(none)");
1562                 g_printerr ("    see two as one = %s\n",
1563                             res->sep.duplicates ? "yes" : "no");
1564                 break;
1565         case PARSE_TYPE_FIXED:
1566                 g_printerr ("  type = sep\n");
1567                 break;
1568         default:
1569                 ;
1570         }
1571         g_printerr ("  trim space = %d\n", res->trim_spaces);
1572
1573         ubuffer[g_unichar_to_utf8 (res->stringindicator, ubuffer)] = 0;
1574         g_printerr ("  string indicator = %s\n", ubuffer);
1575         g_printerr ("    see two as one = %s\n",
1576                     res->indicator_2x_is_single ? "yes" : "no");
1577
1578         g_printerr ("  line terminators =");
1579         for (l = res->terminator; l; l = l->next) {
1580                 const char *t = l->data;
1581                 if (strcmp (t, "\n") == 0)
1582                         g_printerr (" unix");
1583                 else if (strcmp (t, "\r") == 0)
1584                         g_printerr (" mac");
1585                 else if (strcmp (t, "\r\n") == 0)
1586                         g_printerr (" dos");
1587                 else
1588                         g_printerr (" other");
1589         }
1590         g_printerr ("\n");
1591
1592         for (ui = 0; ui < res->formats->len; ui++) {
1593                 GOFormat const *fmt = g_ptr_array_index (res->formats, ui);
1594                 const GString *decimal = ui < res->formats_decimal->len
1595                         ? g_ptr_array_index (res->formats_decimal, ui)
1596                         : NULL;
1597                 const GString *thousand = ui < res->formats_thousand->len
1598                         ? g_ptr_array_index (res->formats_thousand, ui)
1599                         : NULL;
1600
1601                 g_printerr ("  fmt.%d = %s\n", ui, go_format_as_XL (fmt));
1602                 if (decimal)
1603                         g_printerr ("  fmt.%d.dec = %s\n", ui, decimal->str);
1604                 if (thousand)
1605                         g_printerr ("  fmt.%d.thou = %s\n", ui, thousand->str);
1606         }
1607 }
1608
1609 /**
1610  * stf_parse_options_guess:
1611  * @data: the input data.
1612  *
1613  * Returns: (transfer full): the guessed options.
1614  **/
1615 StfParseOptions_t *
1616 stf_parse_options_guess (char const *data)
1617 {
1618         StfParseOptions_t *res;
1619         GStringChunk *lines_chunk;
1620         GPtrArray *lines;
1621         int tabcount;
1622         int sepcount;
1623         gunichar sepchar = go_locale_get_arg_sep ();
1624
1625         g_return_val_if_fail (data != NULL, NULL);
1626
1627         res = stf_parse_options_new ();
1628         lines_chunk = g_string_chunk_new (100 * 1024);
1629         lines = stf_parse_lines (res, lines_chunk, data, 1000, FALSE);
1630
1631         tabcount = count_character (lines, '\t', 0.2);
1632         sepcount = count_character (lines, sepchar, 0.2);
1633
1634         /* At least one tab per line and enough to separate every
1635            would-be sepchars.  */
1636         if (tabcount >= 1 && tabcount >= sepcount - 1)
1637                 stf_parse_options_csv_set_separators (res, "\t", NULL);
1638         else {
1639                 gunichar c;
1640
1641                 /*
1642                  * Try a few more or less likely characters and pick the first
1643                  * one that occurs on at least half the lines.
1644                  *
1645                  * The order is mostly random, although ' ' and '!' which
1646                  * could very easily occur in text are put last.
1647                  */
1648                 if (count_character (lines, (c = sepchar), 0.5) > 0 ||
1649                     count_character (lines, (c = go_locale_get_col_sep ()), 0.5) > 0 ||
1650                     count_character (lines, (c = ':'), 0.5) > 0 ||
1651                     count_character (lines, (c = ','), 0.5) > 0 ||
1652                     count_character (lines, (c = ';'), 0.5) > 0 ||
1653                     count_character (lines, (c = '|'), 0.5) > 0 ||
1654                     count_character (lines, (c = '!'), 0.5) > 0 ||
1655                     count_character (lines, (c = ' '), 0.5) > 0) {
1656                         char sep[7];
1657                         sep[g_unichar_to_utf8 (c, sep)] = 0;
1658                         if (c == ' ')
1659                                 strcat (sep, "\t");
1660                         stf_parse_options_csv_set_separators (res, sep, NULL);
1661                 }
1662         }
1663
1664         // For now, always separated:
1665         stf_parse_options_set_type (res, PARSE_TYPE_CSV);
1666
1667         switch (res->parsetype) {
1668         case PARSE_TYPE_CSV: {
1669                 gboolean dups =
1670                         res->sep.chr &&
1671                         strchr (res->sep.chr, ' ') != NULL;
1672                 gboolean trim =
1673                         res->sep.chr &&
1674                         strchr (res->sep.chr, ' ') != NULL;
1675
1676                 stf_parse_options_set_trim_spaces (res, TRIM_TYPE_LEFT | TRIM_TYPE_RIGHT);
1677                 stf_parse_options_csv_set_indicator_2x_is_single (res, TRUE);
1678                 stf_parse_options_csv_set_duplicates (res, dups);
1679                 stf_parse_options_csv_set_trim_seps (res, trim);
1680
1681                 stf_parse_options_csv_set_stringindicator (res, '"');
1682                 break;
1683         }
1684
1685         case PARSE_TYPE_FIXED:
1686                 break;
1687
1688         default:
1689                 g_assert_not_reached ();
1690         }
1691
1692         stf_parse_general_free (lines);
1693         g_string_chunk_free (lines_chunk);
1694
1695         stf_parse_options_guess_formats (res, data);
1696
1697         if (gnm_debug_flag ("stf"))
1698                 dump_guessed_options (res);
1699
1700         return res;
1701 }
1702
1703 /**
1704  * stf_parse_options_guess_csv:
1705  * @data: the CSV input data.
1706  *
1707  * Returns: (transfer full): the guessed options.
1708  **/
1709 StfParseOptions_t *
1710 stf_parse_options_guess_csv (char const *data)
1711 {
1712         StfParseOptions_t *res;
1713         GStringChunk *lines_chunk;
1714         GPtrArray *lines;
1715         char *sep = NULL;
1716         char const *quoteline = NULL;
1717         int pass;
1718         gunichar stringind = '"';
1719
1720         g_return_val_if_fail (data != NULL, NULL);
1721
1722         res = stf_parse_options_new ();
1723         stf_parse_options_set_type (res, PARSE_TYPE_CSV);
1724         stf_parse_options_set_trim_spaces (res, TRIM_TYPE_LEFT | TRIM_TYPE_RIGHT);
1725         stf_parse_options_csv_set_indicator_2x_is_single (res, TRUE);
1726         stf_parse_options_csv_set_duplicates (res, FALSE);
1727         stf_parse_options_csv_set_trim_seps (res, FALSE);
1728         stf_parse_options_csv_set_stringindicator (res, stringind);
1729
1730         lines_chunk = g_string_chunk_new (100 * 1024);
1731         lines = stf_parse_lines (res, lines_chunk, data, 1000, FALSE);
1732
1733         /*
1734          * Find a line containing a quote; skip first line unless it is
1735          * the only one.  Prefer a line with the quote first.
1736          */
1737         for (pass = 1; !quoteline && pass <= 2; pass++) {
1738                 size_t lno;
1739                 for (lno = MIN (1, lines->len - 1);
1740                      !quoteline && lno < lines->len;
1741                      lno++) {
1742                         GPtrArray *boxline = g_ptr_array_index (lines, lno);
1743                         const char *line = g_ptr_array_index (boxline, 0);
1744                         switch (pass) {
1745                         case 1:
1746                                 if (g_utf8_get_char (line) == stringind)
1747                                         quoteline = line;
1748                                 break;
1749                         case 2:
1750                                 if (my_utf8_strchr (line, stringind))
1751                                         quoteline = line;
1752                                 break;
1753                         }
1754                 }
1755         }
1756
1757         if (quoteline) {
1758                 const char *p0 = my_utf8_strchr (quoteline, stringind);
1759                 const char *p = p0;
1760
1761                 do {
1762                         p = g_utf8_next_char (p);
1763                 } while (*p && g_utf8_get_char (p) != stringind);
1764                 if (*p) p = g_utf8_next_char (p);
1765                 while (*p && g_unichar_isspace (g_utf8_get_char (p)))
1766                         p = g_utf8_next_char (p);
1767                 if (*p) {
1768                         /* Use the character after the quote.  */
1769                         sep = g_strndup (p, g_utf8_next_char (p) - p);
1770                 } else {
1771                         /* Try to use character before the quote.  */
1772                         while (p0 > quoteline && !sep) {
1773                                 p = p0;
1774                                 p0 = g_utf8_prev_char (p0);
1775                                 if (!g_unichar_isspace (g_utf8_get_char (p0)))
1776                                         sep = g_strndup (p0, p - p0);
1777                         }
1778                 }
1779         }
1780
1781         if (!sep)
1782                 sep = g_strdup (",");
1783         stf_parse_options_csv_set_separators (res, sep, NULL);
1784         g_free (sep);
1785
1786         stf_parse_general_free (lines);
1787         g_string_chunk_free (lines_chunk);
1788
1789         stf_parse_options_guess_formats (res, data);
1790
1791         if (gnm_debug_flag ("stf"))
1792                 dump_guessed_options (res);
1793
1794         return res;
1795 }
1796
1797 typedef enum {
1798         STF_GUESS_DATE_DMY = 1,
1799         STF_GUESS_DATE_MDY = 2,
1800         STF_GUESS_DATE_YMD = 4,
1801
1802         STF_GUESS_NUMBER_DEC_POINT = 0x10,
1803         STF_GUESS_NUMBER_DEC_COMMA = 0x20,
1804         STF_GUESS_NUMBER_DEC_EITHER = 0x30,
1805
1806         STF_GUESS_ALL = 0x37
1807 } StfGuessFormats;
1808
1809 static void
1810 do_check_date (const char *data, StfGuessFormats flag,
1811                gboolean mbd, gboolean ybm,
1812                unsigned *possible,
1813                GODateConventions const *date_conv)
1814 {
1815         GnmValue *v;
1816         gboolean this_mbd, this_ybm;
1817         int imbd;
1818
1819         if (!(*possible & flag))
1820                 return;
1821
1822         v = format_match_datetime (data, date_conv, mbd, TRUE, FALSE);
1823         if (!v || !VALUE_FMT (v))
1824                 goto fail;
1825
1826         imbd = go_format_month_before_day (VALUE_FMT (v));
1827         this_mbd = (imbd >= 1);
1828         this_ybm = (imbd == 2);
1829         if (mbd != this_mbd || ybm != this_ybm)
1830                 goto fail;
1831
1832         goto done;
1833
1834 fail:
1835         *possible &= ~flag;
1836 done:
1837         value_release (v);
1838 }
1839
1840
1841 static void
1842 do_check_number (const char *data, StfGuessFormats flag,
1843                  const GString *dec, const GString *thousand, const GString *curr,
1844                  unsigned *possible, int *decimals)
1845 {
1846         GnmValue *v;
1847         GOFormatFamily family;
1848         const char *pthou;
1849
1850         if (!(*possible & flag))
1851                 return;
1852
1853         v = format_match_decimal_number_with_locale (data, &family, curr, thousand, dec);
1854         if (!v)
1855                 goto fail;
1856
1857         if (*decimals != -2) {
1858                 const char *pdec = strstr (data, dec->str);
1859                 int this_decimals = 0;
1860                 if (pdec) {
1861                         pdec += dec->len;
1862                         while (g_ascii_isdigit (*pdec)) {
1863                                 pdec++;
1864                                 this_decimals++;
1865                         }
1866                 }
1867                 if (*decimals == -1)
1868                         *decimals = this_decimals;
1869                 else if (*decimals != this_decimals)
1870                         *decimals = -2;
1871         }
1872
1873         pthou = strstr (data, thousand->str);
1874         if (pthou) {
1875                 const char *p;
1876                 int digits = 0, nonzero_digits = 0;
1877                 for (p = data; p < pthou; p = g_utf8_next_char (p)) {
1878                         if (g_unichar_isdigit (g_utf8_get_char (p))) {
1879                                 digits++;
1880                                 if (*p != '0')
1881                                         nonzero_digits++;
1882                         }
1883                 }
1884                 // "-.222" implies that "." is not a thousands separator.
1885                 // "0.222" implies that "." is not a thousands separator.
1886                 // "12345,555" implies that "," is not a thousands separator.
1887                 if (nonzero_digits == 0 || digits > 3)
1888                         goto fail;
1889         }
1890
1891         goto done;
1892
1893 fail:
1894         *possible &= ~flag;
1895 done:
1896         value_release (v);
1897 }
1898
1899
1900 /**
1901  * stf_parse_options_guess_formats:
1902  * @data: the CSV input data.
1903  *
1904  * This function attempts to recognize data formats on a column-by-column
1905  * basis under the assumption that the data in a text file will generally
1906  * use the same data formats.
1907  *
1908  * This is useful because not all values give sufficient information by
1909  * themselves to tell what format the data is in.  For example, "1/2/2000"
1910  * is likely to be a date in year 2000, but it is not clear if it is in
1911  * January or February.  If another value in the same column is "31/1/1999"
1912  * then it is likely that the former date was in February.
1913  *
1914  * Likewise, a value of "123,456" could mean either 1.23456e5 or 1.23456e2.
1915  * A later value of "111,200.22" would clear up the confusion.
1916  *
1917  **/
1918 void
1919 stf_parse_options_guess_formats (StfParseOptions_t *po, char const *data)
1920 {
1921         GStringChunk *lines_chunk;
1922         GPtrArray *lines;
1923         unsigned lno, col, colcount, sline;
1924         GODateConventions const *date_conv = go_date_conv_from_str ("Lotus:1900");
1925         GString *s_comma = g_string_new (",");
1926         GString *s_dot = g_string_new (".");
1927         GString *s_dollar = g_string_new ("$");
1928         gboolean debug = gnm_debug_flag ("stf");
1929
1930         g_ptr_array_set_size (po->formats, 0);
1931         g_ptr_array_set_size (po->formats_decimal, 0);
1932         g_ptr_array_set_size (po->formats_thousand, 0);
1933         g_ptr_array_set_size (po->formats_curr, 0);
1934
1935         lines_chunk = g_string_chunk_new (100 * 1024);
1936         lines = stf_parse_general (po, lines_chunk, data, data + strlen (data));
1937
1938         colcount = 0;
1939         for (lno = 0; lno < lines->len; lno++) {
1940                 GPtrArray *line = g_ptr_array_index (lines, lno);
1941                 colcount = MAX (colcount, line->len);
1942         }
1943
1944         // Ignore first line unless it is the only one
1945         sline = MIN ((int)lines->len - 1, 1);
1946
1947         g_ptr_array_set_size (po->formats, colcount);
1948         g_ptr_array_set_size (po->formats_decimal, colcount);
1949         g_ptr_array_set_size (po->formats_thousand, colcount);
1950         g_ptr_array_set_size (po->formats_curr, colcount);
1951         for (col = 0; col < colcount; col++) {
1952                 unsigned possible = STF_GUESS_ALL;
1953                 GOFormat *fmt = NULL;
1954                 gboolean seen_dot = FALSE;
1955                 gboolean seen_comma = FALSE;
1956                 int decimals_if_point = -1; // -1: unset; -2: inconsistent; >=0: count
1957                 int decimals_if_comma = -1; // -1: unset; -2: inconsistent; >=0: count
1958
1959                 for (lno = sline; possible && lno < lines->len; lno++) {
1960                         GPtrArray *line = g_ptr_array_index (lines, lno);
1961                         const char *data = col < line->len ? g_ptr_array_index (line, col) : "";
1962                         unsigned prev_possible = possible;
1963
1964                         if (*data == 0 || data[0] == '\'')
1965                                 continue;
1966
1967                         do_check_date (data, STF_GUESS_DATE_DMY, FALSE, FALSE, &possible, date_conv);
1968                         do_check_date (data, STF_GUESS_DATE_MDY, TRUE, FALSE, &possible, date_conv);
1969                         do_check_date (data, STF_GUESS_DATE_YMD, TRUE, TRUE, &possible, date_conv);
1970
1971                         if ((possible & STF_GUESS_NUMBER_DEC_EITHER) == STF_GUESS_NUMBER_DEC_EITHER) {
1972                                 const char *pdot = strstr (data, s_dot->str);
1973                                 const char *pcomma = strstr (data, s_comma->str);
1974                                 if (pdot && pcomma) {
1975                                         // Both -- last one is the decimal separator
1976                                         if (pdot > pcomma)
1977                                                 possible &= ~STF_GUESS_NUMBER_DEC_COMMA;
1978                                         else
1979                                                 possible &= ~STF_GUESS_NUMBER_DEC_POINT;
1980                                 } else if (pdot && strstr (pdot + s_dot->len, s_dot->str)) {
1981                                         // Two dots so they are thousands separators
1982                                         possible &= ~STF_GUESS_NUMBER_DEC_POINT;
1983                                 } else if (pcomma && strstr (pcomma + s_comma->len, s_comma->str)) {
1984                                         // Two commas so they are thousands separators
1985                                         possible &= ~STF_GUESS_NUMBER_DEC_COMMA;
1986                                 }
1987
1988                                 seen_dot = seen_dot || (pdot != 0);
1989                                 seen_comma = seen_comma || (pcomma != 0);
1990                         }
1991                         do_check_number (data, STF_GUESS_NUMBER_DEC_POINT,
1992                                          s_dot, s_comma, s_dollar,
1993                                          &possible, &decimals_if_point);
1994                         do_check_number (data, STF_GUESS_NUMBER_DEC_COMMA,
1995                                          s_comma, s_dot, s_dollar,
1996                                          &possible, &decimals_if_comma);
1997
1998                         if (possible != prev_possible && debug)
1999                                 g_printerr ("col=%d; after [%s] possible=0x%x\n", col, data, possible);
2000                 }
2001
2002                 if ((possible & STF_GUESS_NUMBER_DEC_EITHER) == STF_GUESS_NUMBER_DEC_EITHER &&
2003                     !seen_dot && !seen_comma) {
2004                         // It doesn't matter what the separators are
2005                         possible &= ~STF_GUESS_NUMBER_DEC_COMMA;
2006                 }
2007
2008                 switch (possible) {
2009                 case STF_GUESS_DATE_DMY:
2010                         fmt = go_format_new_from_XL ("d-mmm-yyyy");
2011                         break;
2012                 case STF_GUESS_DATE_MDY:
2013                         fmt = go_format_new_from_XL ("m/d/yyyy");
2014                         break;
2015                 case STF_GUESS_DATE_YMD:
2016                         fmt = go_format_new_from_XL ("yyyy-mm-dd");
2017                         break;
2018                 case STF_GUESS_NUMBER_DEC_POINT:
2019                         g_ptr_array_index (po->formats_decimal, col) = g_string_new (".");
2020                         g_ptr_array_index (po->formats_thousand, col) = g_string_new (",");
2021                         g_ptr_array_index (po->formats_curr, col) = g_string_new (s_dollar->str);
2022                         if (decimals_if_point > 0) {
2023                                 // Don't set format if decimals is zero
2024                                 GString *fmt_str = g_string_new (NULL);
2025                                 go_format_generate_number_str (fmt_str, 1, decimals_if_point, seen_comma, FALSE, FALSE, "", "");
2026                                 fmt = go_format_new_from_XL (fmt_str->str);
2027                                 g_string_free (fmt_str, TRUE);
2028                         }
2029                         break;
2030                 case STF_GUESS_NUMBER_DEC_COMMA:
2031                         g_ptr_array_index (po->formats_decimal, col) = g_string_new (",");
2032                         g_ptr_array_index (po->formats_thousand, col) = g_string_new (".");
2033                         g_ptr_array_index (po->formats_curr, col) = g_string_new (s_dollar->str);
2034                         if (decimals_if_comma > 0) {
2035                                 // Don't set format if decimals is zero
2036                                 GString *fmt_str = g_string_new (NULL);
2037                                 go_format_generate_number_str (fmt_str, 1, decimals_if_comma, seen_dot, FALSE, FALSE, "", "");
2038                                 fmt = go_format_new_from_XL (fmt_str->str);
2039                                 g_string_free (fmt_str, TRUE);
2040                         }
2041                         break;
2042                 default:
2043                         break;
2044                 }
2045
2046                 if (!fmt)
2047                         fmt = go_format_ref (go_format_general ());
2048                 g_ptr_array_index (po->formats, col) = fmt;
2049         }
2050
2051         stf_parse_general_free (lines);
2052         g_string_chunk_free (lines_chunk);
2053
2054         g_string_free (s_dot, TRUE);
2055         g_string_free (s_comma, TRUE);
2056         g_string_free (s_dollar, TRUE);
2057 }