src/stf-parse.c

   1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
   2 /*
   3  * stf-parse.c : Structured Text Format parser. (STF)
   4  *               A general purpose engine for parsing data
   5  *               in CSV and Fixed width format.
   6  *
   7  *
   8  * Copyright (C) Almer. S. Tigelaar.
   9  * EMail: almer1@dds.nl or almer-t@bigfoot.com
  10  *
  11  * Copyright (C) 2003 Andreas J. Guelzow <aguelzow@taliesin.ca>
  12  * Copyright (C) 2003,2008-2009 Morten Welinder <terra@gnome.org>
  13  *
  14  * This program is free software; you can redistribute it and/or modify
  15  * it under the terms of the GNU General Public License as published by
  16  * the Free Software Foundation; either version 2 of the License, or
  17  * (at your option) any later version.
  18  *
  19  * This program is distributed in the hope that it will be useful,
  20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22  * GNU General Public License for more details.
  23  *
  24  * You should have received a copy of the GNU General Public License
  25  * along with this program; if not, see <https://www.gnu.org/licenses/>.
  26  */
  27
  28 #include <gnumeric-config.h>
  29 #include <glib/gi18n-lib.h>
  30 #include "gnumeric.h"
  31 #include "stf-parse.h"
  32 #include "stf-export.h"
  33
  34 #include "workbook.h"
  35 #include "cell.h"
  36 #include "sheet.h"
  37 #include "expr.h"
  38 #include "clipboard.h"
  39 #include "sheet-style.h"
  40 #include "value.h"
  41 #include "mstyle.h"
  42 #include "number-match.h"
  43 #include "gutils.h"
  44 #include "parse-util.h"
  45 #include "number-match.h"
  46 #include "gnm-format.h"
  47 #include "ranges.h"
  48 #include <goffice/goffice.h>
  49
  50 #include <stdlib.h>
  51 #include <locale.h>
  52 #include <string.h>
  53
  54 #define SETUP_LOCALE_SWITCH char *oldlocale = NULL
  55
  56 #define START_LOCALE_SWITCH if (parseoptions->locale) {\
  57 oldlocale = g_strdup(go_setlocale (LC_ALL, NULL)); \
  58 go_setlocale(LC_ALL, parseoptions->locale);}
  59
  60 #define END_LOCALE_SWITCH if (oldlocale) {\
  61 go_setlocale(LC_ALL, oldlocale);\
  62 g_free (oldlocale);}
  63
  64 /* Source_t struct, used for interchanging parsing information between the low level parse functions */
  65 typedef struct {
  66         GStringChunk *chunk;
  67         char const *position;  /* Indicates the current position within data */
  68
  69         /* Used internally for fixed width parsing */
  70         int splitpos;          /* Indicates current position in splitpositions array */
  71         int linepos;           /* Position on the current line */
  72 } Source_t;
  73
  74 /* Struct used for autodiscovery */
  75 typedef struct {
  76         int start;
  77         int stop;
  78 } AutoDiscovery_t;
  79
  80 /*
  81  * Some silly dude make the length field an unsigned int.  C just does
  82  * not deal very well with that.
  83  */
  84 static inline int
  85 my_garray_len (GArray const *a)
  86 {
  87         return (int)a->len;
  88 }
  89
  90 static char *
  91 my_utf8_strchr (const char *p, gunichar uc)
  92 {
  93         return uc < 0x7f ? strchr (p, uc) : g_utf8_strchr (p, -1, uc);
  94 }
  95
  96 static int
  97 compare_terminator (char const *s, StfParseOptions_t *parseoptions)
  98 {
  99         guchar const *us = (guchar const *)s;
 100         GSList *l;
 101
 102         if (*us > parseoptions->compiled_terminator.max ||
 103             *us < parseoptions->compiled_terminator.min)
 104                 return 0;
 105
 106         for (l = parseoptions->terminator; l; l = l->next) {
 107                 char const *term = l->data;
 108                 char const *d = s;
 109
 110                 while (*term) {
 111                         if (*d != *term)
 112                                 goto next;
 113                         term++;
 114                         d++;
 115                 }
 116                 return d - s;
 117
 118         next:
 119                 ;
 120         }
 121         return 0;
 122 }
 123
 124
 125 /*******************************************************************************************************
 126  * STF PARSE OPTIONS : StfParseOptions related
 127  *******************************************************************************************************/
 128
 129 static void
 130 gnm_g_string_free (GString *s)
 131 {
 132         if (s) g_string_free (s, TRUE);
 133 }
 134
 135
 136 /**
 137  * stf_parse_options_new:
 138  *
 139  * This will return a new StfParseOptions_t struct.
 140  * The struct should, after being used, freed with stf_parse_options_free.
 141  **/
 142 static StfParseOptions_t *
 143 stf_parse_options_new (void)
 144 {
 145         StfParseOptions_t* parseoptions = g_new0 (StfParseOptions_t, 1);
 146
 147         parseoptions->parsetype   = PARSE_TYPE_NOTSET;
 148
 149         parseoptions->terminator  = NULL;
 150         stf_parse_options_add_line_terminator (parseoptions, "\r\n");
 151         stf_parse_options_add_line_terminator (parseoptions, "\n");
 152         stf_parse_options_add_line_terminator (parseoptions, "\r");
 153
 154         parseoptions->trim_spaces = (TRIM_TYPE_RIGHT | TRIM_TYPE_LEFT);
 155         parseoptions->locale = NULL;
 156
 157         parseoptions->splitpositions = NULL;
 158         stf_parse_options_fixed_splitpositions_clear (parseoptions);
 159
 160         parseoptions->stringindicator = '"';
 161         parseoptions->indicator_2x_is_single = TRUE;
 162         parseoptions->sep.duplicates = FALSE;
 163         parseoptions->trim_seps = FALSE;
 164
 165         parseoptions->sep.str = NULL;
 166         parseoptions->sep.chr = NULL;
 167
 168         parseoptions->col_autofit_array = NULL;
 169         parseoptions->col_import_array = NULL;
 170         parseoptions->col_import_array_len = 0;
 171         parseoptions->formats = g_ptr_array_new_with_free_func ((GDestroyNotify)go_format_unref);
 172         parseoptions->formats_decimal = g_ptr_array_new_with_free_func ((GDestroyNotify)gnm_g_string_free);
 173         parseoptions->formats_thousand = g_ptr_array_new_with_free_func ((GDestroyNotify)gnm_g_string_free);
 174         parseoptions->formats_curr = g_ptr_array_new_with_free_func ((GDestroyNotify)gnm_g_string_free);
 175
 176         parseoptions->cols_exceeded = FALSE;
 177         parseoptions->rows_exceeded = FALSE;
 178         parseoptions->ref_count = 1;
 179
 180         return parseoptions;
 181 }
 182
 183 /**
 184  * stf_parse_options_free:
 185  *
 186  * will free @parseoptions, note that this will not free the splitpositions
 187  * member (GArray) of the struct, the caller is responsible for that.
 188  **/
 189 void
 190 stf_parse_options_free (StfParseOptions_t *parseoptions)
 191 {
 192         g_return_if_fail (parseoptions != NULL);
 193
 194         if (parseoptions->ref_count-- > 1)
 195                 return;
 196
 197         g_free (parseoptions->col_import_array);
 198         g_free (parseoptions->col_autofit_array);
 199         g_free (parseoptions->locale);
 200         g_free (parseoptions->sep.chr);
 201
 202         if (parseoptions->sep.str) {
 203                 GSList *l;
 204
 205                 for (l = parseoptions->sep.str; l != NULL; l = l->next)
 206                         g_free ((char *) l->data);
 207                 g_slist_free (parseoptions->sep.str);
 208         }
 209
 210         g_array_free (parseoptions->splitpositions, TRUE);
 211
 212         stf_parse_options_clear_line_terminator (parseoptions);
 213
 214         g_ptr_array_free (parseoptions->formats, TRUE);
 215         g_ptr_array_free (parseoptions->formats_decimal, TRUE);
 216         g_ptr_array_free (parseoptions->formats_thousand, TRUE);
 217         g_ptr_array_free (parseoptions->formats_curr, TRUE);
 218
 219         g_free (parseoptions);
 220 }
 221
 222 static StfParseOptions_t *
 223 stf_parse_options_ref (StfParseOptions_t *parseoptions)
 224 {
 225         parseoptions->ref_count++;
 226         return parseoptions;
 227 }
 228
 229 GType
 230 stf_parse_options_get_type (void)
 231 {
 232         static GType t = 0;
 233
 234         if (t == 0) {
 235                 t = g_boxed_type_register_static ("StfParseOptions_t",
 236                          (GBoxedCopyFunc)stf_parse_options_ref,
 237                          (GBoxedFreeFunc)stf_parse_options_free);
 238         }
 239         return t;
 240 }
 241
 242 void
 243 stf_parse_options_set_type (StfParseOptions_t *parseoptions, StfParseType_t const parsetype)
 244 {
 245         g_return_if_fail (parseoptions != NULL);
 246         g_return_if_fail (parsetype == PARSE_TYPE_CSV || parsetype == PARSE_TYPE_FIXED);
 247
 248         parseoptions->parsetype = parsetype;
 249 }
 250
 251 static gint
 252 long_string_first (gchar const *a, gchar const *b)
 253 {
 254         /* This actually is UTF-8 safe.  */
 255         return strlen (b) - strlen (a);
 256 }
 257
 258 static void
 259 compile_terminators (StfParseOptions_t *parseoptions)
 260 {
 261         GSList *l;
 262         GO_SLIST_SORT (parseoptions->terminator, (GCompareFunc)long_string_first);
 263
 264         parseoptions->compiled_terminator.min = 255;
 265         parseoptions->compiled_terminator.max = 0;
 266         for (l = parseoptions->terminator; l; l = l->next) {
 267                 const guchar *term = l->data;
 268                 parseoptions->compiled_terminator.min =
 269                         MIN (parseoptions->compiled_terminator.min, *term);
 270                 parseoptions->compiled_terminator.max =
 271                         MAX (parseoptions->compiled_terminator.max, *term);
 272         }
 273 }
 274
 275 /**
 276  * stf_parse_options_add_line_terminator:
 277  *
 278  * This will add to the line terminators, in both the Fixed width and CSV delimited importers
 279  * this indicates the end of a row.
 280  *
 281  **/
 282 void
 283 stf_parse_options_add_line_terminator (StfParseOptions_t *parseoptions, char const *terminator)
 284 {
 285         g_return_if_fail (parseoptions != NULL);
 286         g_return_if_fail (terminator != NULL && *terminator != 0);
 287
 288         GO_SLIST_PREPEND (parseoptions->terminator, g_strdup (terminator));
 289         compile_terminators (parseoptions);
 290 }
 291
 292 /**
 293  * stf_parse_options_clear_line_terminator:
 294  *
 295  * This will clear the line terminator, in both the Fixed width and CSV delimited importers
 296  * this indicates the end of a row.
 297  *
 298  **/
 299 void
 300 stf_parse_options_clear_line_terminator (StfParseOptions_t *parseoptions)
 301 {
 302         g_return_if_fail (parseoptions != NULL);
 303
 304         g_slist_free_full (parseoptions->terminator, g_free);
 305         parseoptions->terminator = NULL;
 306         compile_terminators (parseoptions);
 307 }
 308
 309 /**
 310  * stf_parse_options_set_trim_spaces:
 311  *
 312  * If enabled will trim spaces in every parsed field on left and/or right
 313  * sides.
 314  **/
 315 void
 316 stf_parse_options_set_trim_spaces (StfParseOptions_t *parseoptions, StfTrimType_t const trim_spaces)
 317 {
 318         g_return_if_fail (parseoptions != NULL);
 319
 320         parseoptions->trim_spaces = trim_spaces;
 321 }
 322
 323 /**
 324  * stf_parse_options_csv_set_separators:
 325  * @parseoptions: #StfParseOptions_t
 326  * @character:
 327  * @string: (element-type char):
 328  *
 329  * A copy is made of the parameters.
 330  **/
 331 void
 332 stf_parse_options_csv_set_separators (StfParseOptions_t *parseoptions, char const *character,
 333                                       GSList const *string)
 334 {
 335         g_return_if_fail (parseoptions != NULL);
 336
 337         g_free (parseoptions->sep.chr);
 338         parseoptions->sep.chr = g_strdup (character);
 339
 340         g_slist_free_full (parseoptions->sep.str, g_free);
 341         parseoptions->sep.str = go_slist_map (string, (GOMapFunc)g_strdup);
 342 }
 343
 344 void
 345 stf_parse_options_csv_set_stringindicator (StfParseOptions_t *parseoptions, gunichar const stringindicator)
 346 {
 347         g_return_if_fail (parseoptions != NULL);
 348
 349         parseoptions->stringindicator = stringindicator;
 350 }
 351
 352 /**
 353  * stf_parse_options_csv_set_indicator_2x_is_single:
 354  * @indic_2x: a boolean value indicating whether we want to see two
 355  *              adjacent string indicators as a single string indicator
 356  *              that is part of the cell, rather than a terminator.
 357  **/
 358 void
 359 stf_parse_options_csv_set_indicator_2x_is_single (StfParseOptions_t *parseoptions,
 360                                                   gboolean const indic_2x)
 361 {
 362         g_return_if_fail (parseoptions != NULL);
 363
 364         parseoptions->indicator_2x_is_single = indic_2x;
 365 }
 366
 367 /**
 368  * stf_parse_options_csv_set_duplicates:
 369  * @parseoptions:
 370  * @duplicates: a boolean value indicating whether we want to see two
 371  *               separators right behind each other as one
 372  **/
 373 void
 374 stf_parse_options_csv_set_duplicates (StfParseOptions_t *parseoptions, gboolean const duplicates)
 375 {
 376         g_return_if_fail (parseoptions != NULL);
 377
 378         parseoptions->sep.duplicates = duplicates;
 379 }
 380
 381 /**
 382  * stf_parse_options_csv_set_trim_seps:
 383  * @trim_seps: a boolean value indicating whether we want to ignore
 384  *               separators at the beginning of lines
 385  **/
 386 void
 387 stf_parse_options_csv_set_trim_seps (StfParseOptions_t *parseoptions, gboolean const trim_seps)
 388 {
 389         g_return_if_fail (parseoptions != NULL);
 390
 391         parseoptions->trim_seps = trim_seps;
 392 }
 393
 394 /**
 395  * stf_parse_options_fixed_splitpositions_clear:
 396  *
 397  * This will clear the splitpositions (== points on which a line is split)
 398  **/
 399 void
 400 stf_parse_options_fixed_splitpositions_clear (StfParseOptions_t *parseoptions)
 401 {
 402         int minus_one = -1;
 403         g_return_if_fail (parseoptions != NULL);
 404
 405         if (parseoptions->splitpositions)
 406                 g_array_free (parseoptions->splitpositions, TRUE);
 407         parseoptions->splitpositions = g_array_new (FALSE, FALSE, sizeof (int));
 408
 409         g_array_append_val (parseoptions->splitpositions, minus_one);
 410 }
 411
 412 /**
 413  * stf_parse_options_fixed_splitpositions_add:
 414  *
 415  * @position will be added to the splitpositions.
 416  **/
 417 void
 418 stf_parse_options_fixed_splitpositions_add (StfParseOptions_t *parseoptions, int position)
 419 {
 420         unsigned int ui;
 421
 422         g_return_if_fail (parseoptions != NULL);
 423         g_return_if_fail (position >= 0);
 424
 425         for (ui = 0; ui < parseoptions->splitpositions->len - 1; ui++) {
 426                 int here = g_array_index (parseoptions->splitpositions, int, ui);
 427                 if (position == here)
 428                         return;
 429                 if (position < here)
 430                         break;
 431         }
 432
 433         g_array_insert_val (parseoptions->splitpositions, ui, position);
 434 }
 435
 436 void
 437 stf_parse_options_fixed_splitpositions_remove (StfParseOptions_t *parseoptions, int position)
 438 {
 439         unsigned int ui;
 440
 441         g_return_if_fail (parseoptions != NULL);
 442         g_return_if_fail (position >= 0);
 443
 444         for (ui = 0; ui < parseoptions->splitpositions->len - 1; ui++) {
 445                 int here = g_array_index (parseoptions->splitpositions, int, ui);
 446                 if (position == here)
 447                         g_array_remove_index (parseoptions->splitpositions, ui);
 448                 if (position <= here)
 449                         return;
 450         }
 451 }
 452
 453 int
 454 stf_parse_options_fixed_splitpositions_count (StfParseOptions_t *parseoptions)
 455 {
 456         return parseoptions->splitpositions->len;
 457 }
 458
 459 int
 460 stf_parse_options_fixed_splitpositions_nth (StfParseOptions_t *parseoptions, int n)
 461 {
 462         return g_array_index (parseoptions->splitpositions, int, n);
 463 }
 464
 465
 466 /**
 467  * stf_parse_options_valid:
 468  * @parseoptions: an import options struct
 469  *
 470  * Checks if @parseoptions is correctly filled
 471  *
 472  * returns : TRUE if it is correctly filled, FALSE otherwise.
 473  **/
 474 static gboolean
 475 stf_parse_options_valid (StfParseOptions_t *parseoptions)
 476 {
 477         g_return_val_if_fail (parseoptions != NULL, FALSE);
 478
 479         if (parseoptions->parsetype == PARSE_TYPE_FIXED) {
 480                 if (!parseoptions->splitpositions) {
 481                         g_warning ("STF: No splitpositions in struct");
 482                         return FALSE;
 483                 }
 484         }
 485
 486         return TRUE;
 487 }
 488
 489 /*******************************************************************************************************
 490  * STF PARSE : The actual routines that do the 'trick'
 491  *******************************************************************************************************/
 492
 493 static void
 494 trim_spaces_inplace (char *field, StfParseOptions_t const *parseoptions)
 495 {
 496         if (!field) return;
 497
 498         if (parseoptions->trim_spaces & TRIM_TYPE_LEFT) {
 499                 char *s = field;
 500
 501                 while (g_unichar_isspace (g_utf8_get_char (s)))
 502                         s = g_utf8_next_char (s);
 503
 504                 if (s != field)
 505                         memmove (field, s, 1 + strlen (s));
 506         }
 507
 508         if (parseoptions->trim_spaces & TRIM_TYPE_RIGHT) {
 509                 char *s = field + strlen (field);
 510
 511                 while (field != s) {
 512                         s = g_utf8_prev_char (s);
 513                         if (!g_unichar_isspace (g_utf8_get_char (s)))
 514                                 break;
 515                         *s = 0;
 516                 }
 517         }
 518 }
 519
 520 /**
 521  * stf_parse_csv_is_separator:
 522  *
 523  * returns NULL if @character is not a separator, a pointer to the character
 524  * after the separator otherwise.
 525  **/
 526 static char const *
 527 stf_parse_csv_is_separator (char const *character, char const *chr, GSList const *str)
 528 {
 529         g_return_val_if_fail (character != NULL, NULL);
 530
 531         if (*character == 0)
 532                 return NULL;
 533
 534         if (str) {
 535                 GSList const *l;
 536
 537                 for (l = str; l != NULL; l = l->next) {
 538                         char const *s = l->data;
 539                         char const *r;
 540                         glong cnt;
 541                         glong const len = g_utf8_strlen (s, -1);
 542
 543                         /* Don't compare past the end of the buffer! */
 544                         for (r = character, cnt = 0; cnt < len; cnt++, r = g_utf8_next_char (r))
 545                                 if (*r == '\0')
 546                                         break;
 547
 548                         if ((cnt == len) && (memcmp (character, s, len) == 0))
 549                                 return g_utf8_offset_to_pointer (character, len);
 550                 }
 551         }
 552
 553         if (chr && my_utf8_strchr (chr, g_utf8_get_char (character)))
 554                 return g_utf8_next_char(character);
 555
 556         return NULL;
 557 }
 558
 559 /*
 560  * stf_parse_eat_separators:
 561  *
 562  * skip over leading separators
 563  *
 564  */
 565
 566 static void
 567 stf_parse_eat_separators (Source_t *src, StfParseOptions_t *parseoptions)
 568 {
 569         char const *cur, *next;
 570
 571         g_return_if_fail (src != NULL);
 572         g_return_if_fail (parseoptions != NULL);
 573
 574         cur = src->position;
 575
 576         if (*cur == '\0' || compare_terminator (cur, parseoptions))
 577                 return;
 578         while ((next = stf_parse_csv_is_separator (cur, parseoptions->sep.chr, parseoptions->sep.str)))
 579                 cur = next;
 580         src->position = cur;
 581         return;
 582 }
 583
 584
 585 typedef enum {
 586         STF_CELL_ERROR,
 587         STF_CELL_EOF,
 588         STF_CELL_EOL,
 589         STF_CELL_FIELD_NO_SEP,
 590         STF_CELL_FIELD_SEP
 591 } StfParseCellRes;
 592
 593 static StfParseCellRes
 594 stf_parse_csv_cell (GString *text, Source_t *src, StfParseOptions_t *parseoptions)
 595 {
 596         char const *cur;
 597         gboolean saw_sep = FALSE;
 598
 599         g_return_val_if_fail (src != NULL, STF_CELL_ERROR);
 600         g_return_val_if_fail (parseoptions != NULL, STF_CELL_ERROR);
 601
 602         cur = src->position;
 603         g_return_val_if_fail (cur != NULL, STF_CELL_ERROR);
 604
 605         /* Skip whitespace, but stop at line terminators.  */
 606         while (1) {
 607                 int term_len;
 608
 609                 if (*cur == 0) {
 610                         src->position = cur;
 611                         return STF_CELL_EOF;
 612                 }
 613
 614                 term_len = compare_terminator (cur, parseoptions);
 615                 if (term_len) {
 616                         src->position = cur + term_len;
 617                         return STF_CELL_EOL;
 618                 }
 619
 620                 if ((parseoptions->trim_spaces & TRIM_TYPE_LEFT) == 0)
 621                         break;
 622
 623                 if (stf_parse_csv_is_separator (cur, parseoptions->sep.chr,
 624                                                 parseoptions->sep.str))
 625                         break;
 626
 627                 if (!g_unichar_isspace (g_utf8_get_char (cur)))
 628                         break;
 629                 cur = g_utf8_next_char (cur);
 630         }
 631
 632         if (parseoptions->stringindicator != 0 &&
 633             g_utf8_get_char (cur) == parseoptions->stringindicator) {
 634                 cur = g_utf8_next_char (cur);
 635                 while (*cur) {
 636                         gunichar uc = g_utf8_get_char (cur);
 637                         cur = g_utf8_next_char (cur);
 638
 639                         if (uc == parseoptions->stringindicator) {
 640                                 if (parseoptions->indicator_2x_is_single &&
 641                                     g_utf8_get_char (cur) == parseoptions->stringindicator)
 642                                         cur = g_utf8_next_char (cur);
 643                                 else {
 644                                         /* "field content"dropped-garbage,  */
 645                                         while (*cur && !compare_terminator (cur, parseoptions)) {
 646                                                 char const *post = stf_parse_csv_is_separator
 647                                                         (cur, parseoptions->sep.chr, parseoptions->sep.str);
 648                                                 if (post) {
 649                                                         cur = post;
 650                                                         saw_sep = TRUE;
 651                                                         break;
 652                                                 }
 653                                                 cur = g_utf8_next_char (cur);
 654                                         }
 655                                         break;
 656                                 }
 657                         }
 658
 659                         g_string_append_unichar (text, uc);
 660                 }
 661
 662                 /* We silently allow a missing terminating quote.  */
 663         } else {
 664                 /* Unquoted field.  */
 665
 666                 while (*cur && !compare_terminator (cur, parseoptions)) {
 667
 668                         char const *post = stf_parse_csv_is_separator
 669                                 (cur, parseoptions->sep.chr, parseoptions->sep.str);
 670                         if (post) {
 671                                 cur = post;
 672                                 saw_sep = TRUE;
 673                                 break;
 674                         }
 675
 676                         g_string_append_unichar (text, g_utf8_get_char (cur));
 677                         cur = g_utf8_next_char (cur);
 678                 }
 679
 680                 if (parseoptions->trim_spaces & TRIM_TYPE_RIGHT) {
 681                         while (text->len) {
 682                                 const char *last = g_utf8_prev_char (text->str + text->len);
 683                                 if (!g_unichar_isspace (g_utf8_get_char (last)))
 684                                         break;
 685                                 g_string_truncate (text, last - text->str);
 686                         }
 687                 }
 688         }
 689
 690         src->position = cur;
 691
 692         if (saw_sep && parseoptions->sep.duplicates)
 693                 stf_parse_eat_separators (src, parseoptions);
 694
 695         return saw_sep ? STF_CELL_FIELD_SEP : STF_CELL_FIELD_NO_SEP;
 696 }
 697
 698 /**
 699  * stf_parse_csv_line:
 700  *
 701  * This will parse one line from the current @src->position.
 702  * NOTE: The calling routine is responsible for freeing the result.
 703  *
 704  * returns : a GPtrArray of char*'s
 705  **/
 706 static GPtrArray *
 707 stf_parse_csv_line (Source_t *src, StfParseOptions_t *parseoptions)
 708 {
 709         GPtrArray *line;
 710         gboolean cont = FALSE;
 711         GString *text;
 712
 713         g_return_val_if_fail (src != NULL, NULL);
 714         g_return_val_if_fail (parseoptions != NULL, NULL);
 715
 716         line = g_ptr_array_new ();
 717         if (parseoptions->trim_seps)
 718                 stf_parse_eat_separators (src, parseoptions);
 719
 720         text = g_string_sized_new (30);
 721
 722         while (1) {
 723                 char *ctext;
 724                 StfParseCellRes res =
 725                         stf_parse_csv_cell (text, src, parseoptions);
 726                 trim_spaces_inplace (text->str, parseoptions);
 727                 ctext = g_string_chunk_insert_len (src->chunk,
 728                                                    text->str, text->len);
 729                 g_string_truncate (text, 0);
 730
 731                 switch (res) {
 732                 case STF_CELL_FIELD_NO_SEP:
 733                         g_ptr_array_add (line, ctext);
 734                         cont = FALSE;
 735                         break;
 736
 737                 case STF_CELL_FIELD_SEP:
 738                         g_ptr_array_add (line, ctext);
 739                         cont = TRUE;  /* Make sure we see one more field.  */
 740                         break;
 741
 742                 default:
 743                         if (cont)
 744                                 g_ptr_array_add (line, ctext);
 745                         g_string_free (text, TRUE);
 746                         return line;
 747                 }
 748         }
 749 }
 750
 751 /**
 752  * stf_parse_fixed_cell:
 753  *
 754  * returns a pointer to the parsed cell contents.
 755  **/
 756 static char *
 757 stf_parse_fixed_cell (Source_t *src, StfParseOptions_t *parseoptions)
 758 {
 759         char *res;
 760         char const *cur;
 761         int splitval;
 762
 763         g_return_val_if_fail (src != NULL, NULL);
 764         g_return_val_if_fail (parseoptions != NULL, NULL);
 765
 766         cur = src->position;
 767
 768         if (src->splitpos < my_garray_len (parseoptions->splitpositions))
 769                 splitval = (int) g_array_index (parseoptions->splitpositions, int, src->splitpos);
 770         else
 771                 splitval = -1;
 772
 773         while (*cur != 0 && !compare_terminator (cur, parseoptions) && splitval != src->linepos) {
 774                 src->linepos++;
 775                 cur = g_utf8_next_char (cur);
 776         }
 777
 778         res = g_string_chunk_insert_len (src->chunk,
 779                                          src->position,
 780                                          cur - src->position);
 781
 782         src->position = cur;
 783
 784         return res;
 785 }
 786
 787 /**
 788  * stf_parse_fixed_line:
 789  *
 790  * This will parse one line from the current @src->position.
 791  * It will return a GPtrArray with the cell contents as strings.
 792
 793  * NOTE: The calling routine is responsible for freeing result.
 794  **/
 795 static GPtrArray *
 796 stf_parse_fixed_line (Source_t *src, StfParseOptions_t *parseoptions)
 797 {
 798         GPtrArray *line;
 799
 800         g_return_val_if_fail (src != NULL, NULL);
 801         g_return_val_if_fail (parseoptions != NULL, NULL);
 802
 803         src->linepos = 0;
 804         src->splitpos = 0;
 805
 806         line = g_ptr_array_new ();
 807         while (*src->position != '\0' && !compare_terminator (src->position, parseoptions)) {
 808                 char *field = stf_parse_fixed_cell (src, parseoptions);
 809
 810                 trim_spaces_inplace (field, parseoptions);
 811                 g_ptr_array_add (line, field);
 812
 813                 src->splitpos++;
 814         }
 815
 816         while (line->len < parseoptions->splitpositions->len)
 817                 g_ptr_array_add (line, g_strdup (""));
 818
 819         return line;
 820 }
 821
 822 /**
 823  * stf_parse_general_free: (skip)
 824  */
 825 void
 826 stf_parse_general_free (GPtrArray *lines)
 827 {
 828         unsigned lineno;
 829         for (lineno = 0; lineno < lines->len; lineno++) {
 830                 GPtrArray *line = g_ptr_array_index (lines, lineno);
 831                 /* Fields are not freed here.  */
 832                 if (line)
 833                         g_ptr_array_free (line, TRUE);
 834         }
 835         g_ptr_array_free (lines, TRUE);
 836 }
 837
 838
 839 /**
 840  * stf_parse_general: (skip)
 841  *
 842  * Returns: (transfer full): a GPtrArray of lines, where each line is itself a
 843  * GPtrArray of strings.
 844  *
 845  * The caller must free this entire structure, for example by calling
 846  * stf_parse_general_free.
 847  **/
 848 GPtrArray *
 849 stf_parse_general (StfParseOptions_t *parseoptions,
 850                    GStringChunk *lines_chunk,
 851                    char const *data, char const *data_end)
 852 {
 853         GPtrArray *lines;
 854         Source_t src;
 855         int row;
 856         char const *valid_end = data_end;
 857
 858         g_return_val_if_fail (parseoptions != NULL, NULL);
 859         g_return_val_if_fail (data != NULL, NULL);
 860         g_return_val_if_fail (data_end != NULL, NULL);
 861         g_return_val_if_fail (stf_parse_options_valid (parseoptions), NULL);
 862         g_return_val_if_fail (g_utf8_validate (data, data_end-data, &valid_end), NULL);
 863
 864         src.chunk = lines_chunk;
 865         src.position = data;
 866         row = 0;
 867
 868         if ((data_end-data >= 3) && !strncmp(src.position, "\xEF\xBB\xBF", 3)) {
 869                 /* Skip over byte-order mark */
 870                 src.position += 3;
 871         }
 872
 873         lines = g_ptr_array_new ();
 874         while (*src.position != '\0' && src.position < data_end) {
 875                 GPtrArray *line;
 876
 877                 if (row == GNM_MAX_ROWS) {
 878                         parseoptions->rows_exceeded = TRUE;
 879                         break;
 880                 }
 881
 882                 line = parseoptions->parsetype == PARSE_TYPE_CSV
 883                         ? stf_parse_csv_line (&src, parseoptions)
 884                         : stf_parse_fixed_line (&src, parseoptions);
 885
 886                 g_ptr_array_add (lines, line);
 887                 if (parseoptions->parsetype != PARSE_TYPE_CSV)
 888                         src.position += compare_terminator (src.position, parseoptions);
 889                 row++;
 890         }
 891
 892         return lines;
 893 }
 894
 895 /**
 896  * stf_parse_lines: (skip)
 897  * @parseoptions: #StfParseOptions_t
 898  * @lines_chunk:
 899  * @data:
 900  * @maxlines:
 901  * @with_lineno:
 902  *
 903  * Returns: (transfer full): a GPtrArray of lines, where each line is itself a
 904  * GPtrArray of strings.
 905  *
 906  * The caller must free this entire structure, for example by calling
 907  * stf_parse_general_free.
 908  **/
 909 GPtrArray *
 910 stf_parse_lines (StfParseOptions_t *parseoptions,
 911                  GStringChunk *lines_chunk,
 912                  char const *data,
 913                  int maxlines, gboolean with_lineno)
 914 {
 915         GPtrArray *lines;
 916         int lineno = 1;
 917
 918         g_return_val_if_fail (data != NULL, NULL);
 919
 920         lines = g_ptr_array_new ();
 921         while (*data) {
 922                 char const *data0 = data;
 923                 GPtrArray *line = g_ptr_array_new ();
 924
 925                 if (with_lineno) {
 926                         char buf[4 * sizeof (int)];
 927                         sprintf (buf, "%d", lineno);
 928                         g_ptr_array_add (line,
 929                                          g_string_chunk_insert (lines_chunk, buf));
 930                 }
 931
 932                 while (1) {
 933                         int termlen = compare_terminator (data, parseoptions);
 934                         if (termlen > 0 || *data == 0) {
 935                                 g_ptr_array_add (line,
 936                                                  g_string_chunk_insert_len (lines_chunk,
 937                                                                             data0,
 938                                                                             data - data0));
 939                                 data += termlen;
 940                                 break;
 941                         } else
 942                                 data = g_utf8_next_char (data);
 943                 }
 944
 945                 g_ptr_array_add (lines, line);
 946
 947                 lineno++;
 948                 if (lineno >= maxlines)
 949                         break;
 950         }
 951         return lines;
 952 }
 953
 954 char const *
 955 stf_parse_find_line (StfParseOptions_t *parseoptions,
 956                      char const *data,
 957                      int line)
 958 {
 959         while (line > 0) {
 960                 int termlen = compare_terminator (data, parseoptions);
 961                 if (termlen > 0) {
 962                         data += termlen;
 963                         line--;
 964                 } else if (*data == 0) {
 965                         return data;
 966                 } else {
 967                         data = g_utf8_next_char (data);
 968                 }
 969         }
 970         return data;
 971 }
 972
 973
 974 /**
 975  * stf_parse_options_fixed_autodiscover:
 976  * @parseoptions: a Parse options struct.
 977  * @data: The actual data.
 978  * @data_end: data end.
 979  *
 980  * Automatically try to discover columns in the text to be parsed.
 981  * We ignore empty lines (only containing parseoptions->terminator)
 982  *
 983  * FIXME: This is so extremely ugly that I am too tired to rewrite it right now.
 984  *        Think hard of a better more flexible solution...
 985  **/
 986 void
 987 stf_parse_options_fixed_autodiscover (StfParseOptions_t *parseoptions,
 988                                       char const *data, char const *data_end)
 989 {
 990         char const *iterator = data;
 991         GSList *list = NULL;
 992         GSList *list_start = NULL;
 993         int lines = 0;
 994         int effective_lines = 0;
 995         int max_line_length = 0;
 996         int *line_begin_hits = NULL;
 997         int *line_end_hits = NULL;
 998         int i;
 999
1000         stf_parse_options_fixed_splitpositions_clear (parseoptions);
1001
1002         /*
1003          * First take a look at all possible white space combinations
1004          */
1005         while (*iterator && iterator < data_end) {
1006                 gboolean begin_recorded = FALSE;
1007                 AutoDiscovery_t *disc = NULL;
1008                 int position = 0;
1009                 int termlen = 0;
1010
1011                 while (*iterator && (termlen = compare_terminator (iterator, parseoptions)) == 0) {
1012                         if (!begin_recorded && *iterator == ' ') {
1013                                 disc = g_new0 (AutoDiscovery_t, 1);
1014
1015                                 disc->start = position;
1016
1017                                 begin_recorded = TRUE;
1018                         } else if (begin_recorded && *iterator != ' ') {
1019                                 disc->stop = position;
1020                                 list = g_slist_prepend (list, disc);
1021
1022                                 begin_recorded = FALSE;
1023                                 disc = NULL;
1024                         }
1025
1026                         position++;
1027                         iterator++;
1028                 }
1029
1030                 if (position > max_line_length)
1031                         max_line_length = position;
1032
1033                 /*
1034                  * If there are excess spaces at the end of
1035                  * the line : ignore them
1036                  */
1037                 g_free (disc);
1038
1039                 /*
1040                  * Hop over the terminator
1041                  */
1042                 iterator += termlen;
1043
1044                 if (position != 0)
1045                         effective_lines++;
1046
1047                 lines++;
1048         }
1049
1050         list       = g_slist_reverse (list);
1051         list_start = list;
1052
1053         /*
1054          * Kewl stuff :
1055          * Look at the number of hits at each line position
1056          * if the number of hits equals the number of lines
1057          * we can be pretty sure this is the start or end
1058          * of a column, we filter out empty columns
1059          * later
1060          */
1061         line_begin_hits = g_new0 (int, max_line_length + 1);
1062         line_end_hits   = g_new0 (int, max_line_length + 1);
1063
1064         while (list) {
1065                 AutoDiscovery_t *disc = list->data;
1066
1067                 line_begin_hits[disc->start]++;
1068                 line_end_hits[disc->stop]++;
1069
1070                 g_free (disc);
1071
1072                 list = g_slist_next (list);
1073         }
1074         g_slist_free (list_start);
1075
1076         for (i = 0; i < max_line_length + 1; i++)
1077                 if (line_begin_hits[i] == effective_lines || line_end_hits[i] == effective_lines)
1078                         stf_parse_options_fixed_splitpositions_add (parseoptions, i);
1079
1080         /*
1081          * Do some corrections to the initial columns
1082          * detected here, we obviously don't need to
1083          * do this if there are no columns at all.
1084          */
1085         if (my_garray_len (parseoptions->splitpositions) > 0) {
1086                 /*
1087                  * Try to find columns that look like :
1088                  *
1089                  * Example     100
1090                  * Example2      9
1091                  *
1092                  * (In other words : Columns with left & right justification with
1093                  *  a minimum of 2 spaces in the middle)
1094                  * Split these columns in 2
1095                  */
1096
1097                 for (i = 0; i < my_garray_len (parseoptions->splitpositions) - 1; i++) {
1098                         int begin = g_array_index (parseoptions->splitpositions, int, i);
1099                         int end   = g_array_index (parseoptions->splitpositions, int, i + 1);
1100                         int num_spaces   = -1;
1101                         int spaces_start = 0;
1102                         gboolean right_aligned = TRUE;
1103                         gboolean left_aligned  = TRUE;
1104                         gboolean has_2_spaces  = TRUE;
1105
1106                         iterator = data;
1107                         lines = 0;
1108                         while (*iterator && iterator < data_end) {
1109                                 gboolean trigger = FALSE;
1110                                 gboolean space_trigger = FALSE;
1111                                 int pos = 0;
1112
1113                                 num_spaces   = -1;
1114                                 spaces_start = 0;
1115                                 while (*iterator && !compare_terminator (iterator, parseoptions)) {
1116                                         if (pos == begin) {
1117                                                 if (*iterator == ' ')
1118                                                         left_aligned = FALSE;
1119
1120                                                 trigger = TRUE;
1121                                         } else if (pos == end - 1) {
1122                                                 if (*iterator == ' ')
1123                                                         right_aligned = FALSE;
1124
1125                                                 trigger = FALSE;
1126                                         }
1127
1128                                         if (trigger || pos == end - 1) {
1129                                                 if (!space_trigger && *iterator == ' ') {
1130                                                         space_trigger = TRUE;
1131                                                         spaces_start = pos;
1132                                                 } else if (space_trigger && *iterator != ' ') {
1133                                                         space_trigger = FALSE;
1134                                                         num_spaces = pos - spaces_start;
1135                                                 }
1136                                         }
1137
1138                                         iterator++;
1139                                         pos++;
1140                                 }
1141
1142                                 if (num_spaces < 2)
1143                                         has_2_spaces = FALSE;
1144
1145                                 if (*iterator)
1146                                         iterator++;
1147
1148                                 lines++;
1149                         }
1150
1151                         /*
1152                          * If this column meets all the criteria
1153                          * split it into two at the last measured
1154                          * spaces_start + num_spaces
1155                          */
1156                         if (has_2_spaces && right_aligned && left_aligned) {
1157                                 int val = (((spaces_start + num_spaces) - spaces_start) / 2) + spaces_start;
1158
1159                                 g_array_insert_val (parseoptions->splitpositions, i + 1, val);
1160
1161                                 /*
1162                                  * Skip over the inserted column
1163                                  */
1164                                 i++;
1165                         }
1166                 }
1167
1168                 /*
1169                  * Remove empty columns here if needed
1170                  */
1171                 for (i = 0; i < my_garray_len (parseoptions->splitpositions) - 1; i++) {
1172                         int begin = g_array_index (parseoptions->splitpositions, int, i);
1173                         int end = g_array_index (parseoptions->splitpositions, int, i + 1);
1174                         gboolean only_spaces = TRUE;
1175
1176                         iterator = data;
1177                         lines = 0;
1178                         while (*iterator && iterator < data_end) {
1179                                 gboolean trigger = FALSE;
1180                                 int pos = 0;
1181
1182                                 while (*iterator && !compare_terminator (iterator, parseoptions)) {
1183                                         if (pos == begin)
1184                                                 trigger = TRUE;
1185                                         else if (pos == end)
1186                                                 trigger = FALSE;
1187
1188                                         if (trigger) {
1189                                                 if (*iterator != ' ')
1190                                                         only_spaces = FALSE;
1191                                         }
1192
1193                                         iterator++;
1194                                         pos++;
1195                                 }
1196
1197                                 if (*iterator)
1198                                         iterator++;
1199
1200                                 lines++;
1201                         }
1202
1203                         /*
1204                          * The column only contains spaces
1205                          * remove it
1206                          */
1207                         if (only_spaces) {
1208                                 g_array_remove_index (parseoptions->splitpositions, i);
1209
1210                                 /*
1211                                  * We HAVE to make sure that the next column (end) also
1212                                  * gets checked out. If we don't decrease "i" here, we
1213                                  * will skip over it as the indexes shift down after
1214                                  * the removal
1215                                  */
1216                                 i--;
1217                         }
1218                 }
1219         }
1220
1221         g_free (line_begin_hits);
1222         g_free (line_end_hits);
1223 }
1224
1225 /*******************************************************************************************************
1226  * STF PARSE HL: high-level functions that dump the raw data returned by the low-level parsing
1227  *               functions into something meaningful (== application specific)
1228  *******************************************************************************************************/
1229
1230 /*
1231  * This is more or less as gnm_cell_set_text, except...
1232  * 1. Unknown names are not allowed.
1233  * 2. Only '=' can start an expression.
1234  */
1235
1236 static void
1237 stf_cell_set_text (GnmCell *cell, char const *text)
1238 {
1239         GnmExprTop const *texpr;
1240         GnmValue *val;
1241         GOFormat const *fmt = gnm_style_get_format (gnm_cell_get_style (cell));
1242         const GODateConventions *date_conv =
1243                 workbook_date_conv (cell->base.sheet->workbook);
1244
1245         if (!go_format_is_text (fmt) && *text == '=' && text[1] != 0) {
1246                 GnmExprParseFlags flags =
1247                         GNM_EXPR_PARSE_UNKNOWN_NAMES_ARE_INVALID;
1248                 const char *expr_start = text + 1;
1249                 GnmParsePos pos;
1250                 val = NULL;
1251                 parse_pos_init_cell (&pos, cell);
1252                 texpr = gnm_expr_parse_str (expr_start, &pos, flags,
1253                                             NULL, NULL);
1254         } else {
1255                 texpr = NULL;
1256                 val = format_match (text, fmt, date_conv);
1257         }
1258
1259         if (!val && !texpr)
1260                 val = value_new_string (text);
1261
1262         if (val)
1263                 gnm_cell_set_value (cell, val);
1264         else {
1265                 gnm_cell_set_expr (cell, texpr);
1266                 gnm_expr_top_unref (texpr);
1267         }
1268 }
1269
1270 static void
1271 stf_read_remember_settings (Workbook *book, StfParseOptions_t *po)
1272 {
1273         if (po->parsetype == PARSE_TYPE_CSV) {
1274                 GnmStfExport *stfe = gnm_stf_get_stfe (G_OBJECT (book));
1275                 char quote[6];
1276                 int length = g_unichar_to_utf8 (po->stringindicator, quote);
1277                 if (length > 5) {
1278                         quote[0] = '"';
1279                         quote[1] = '\0';
1280                 } else quote[length] = '\0';
1281
1282                 g_object_set (G_OBJECT (stfe), "separator", po->sep.chr, "quote", &quote, NULL);
1283
1284                 if ((po->terminator != NULL) &&  (po->terminator->data != NULL))
1285                         g_object_set (G_OBJECT (stfe), "eol", po->terminator->data, NULL);
1286         }
1287 }
1288
1289 gboolean
1290 stf_parse_sheet (StfParseOptions_t *parseoptions,
1291                  char const *data, char const *data_end,
1292                  Sheet *sheet, int start_col, int start_row)
1293 {
1294         int row;
1295         unsigned int lrow;
1296         GStringChunk *lines_chunk;
1297         GPtrArray *lines;
1298         gboolean result = TRUE;
1299         int col;
1300         unsigned int lcol;
1301         size_t nformats;
1302
1303         SETUP_LOCALE_SWITCH;
1304
1305         g_return_val_if_fail (parseoptions != NULL, FALSE);
1306         g_return_val_if_fail (data != NULL, FALSE);
1307         g_return_val_if_fail (IS_SHEET (sheet), FALSE);
1308
1309         if (!data_end)
1310                 data_end = data + strlen (data);
1311
1312         lines_chunk = g_string_chunk_new (100 * 1024);
1313         lines = stf_parse_general (parseoptions, lines_chunk, data, data_end);
1314         if (lines == NULL)
1315                 result = FALSE;
1316
1317         col = start_col;
1318         nformats = parseoptions->formats->len;
1319         for (lcol = 0; lcol < nformats; lcol++) {
1320                 GOFormat const *fmt = g_ptr_array_index (parseoptions->formats, lcol);
1321                 GnmStyle *mstyle;
1322                 gboolean want_col =
1323                         (parseoptions->col_import_array == NULL ||
1324                          parseoptions->col_import_array_len <= lcol ||
1325                          parseoptions->col_import_array[lcol]);
1326                 if (!want_col || col >= gnm_sheet_get_max_cols (sheet))
1327                         continue;
1328
1329                 if (fmt && !go_format_is_general (fmt)) {
1330                         GnmRange r;
1331                         int end_row = MIN (start_row + (int)lines->len - 1,
1332                                            gnm_sheet_get_last_row (sheet));
1333
1334                         range_init (&r, col, start_row, col, end_row);
1335                         mstyle = gnm_style_new ();
1336                         gnm_style_set_format (mstyle, fmt);
1337                         sheet_apply_style (sheet, &r, mstyle);
1338                 }
1339                 col++;
1340         }
1341
1342         START_LOCALE_SWITCH;
1343         for (row = start_row, lrow = 0;
1344              result && lrow < lines->len;
1345              row++, lrow++) {
1346                 GPtrArray *line;
1347
1348                 if (row >= gnm_sheet_get_max_rows (sheet)) {
1349                         if (!parseoptions->rows_exceeded) {
1350                                 /* FIXME: What locale?  */
1351                                 g_warning (_("There are more rows of data than "
1352                                              "there is room for in the sheet.  Extra "
1353                                              "rows will be ignored."));
1354                                 parseoptions->rows_exceeded = TRUE;
1355                         }
1356                         break;
1357                 }
1358
1359                 col = start_col;
1360                 line = g_ptr_array_index (lines, lrow);
1361
1362                 for (lcol = 0; lcol < line->len; lcol++) {
1363                         GOFormat const *fmt = lcol < nformats
1364                                 ? g_ptr_array_index (parseoptions->formats, lcol)
1365                                 : go_format_general ();
1366                         char const *text = g_ptr_array_index (line, lcol);
1367                         gboolean want_col =
1368                                 (parseoptions->col_import_array == NULL ||
1369                                  parseoptions->col_import_array_len <= lcol ||
1370                                  parseoptions->col_import_array[lcol]);
1371                         if (!want_col)
1372                                 continue;
1373
1374                         if (col >= gnm_sheet_get_max_cols (sheet)) {
1375                                 if (!parseoptions->cols_exceeded) {
1376                                         /* FIXME: What locale?  */
1377                                         g_warning (_("There are more columns of data than "
1378                                                      "there is room for in the sheet.  Extra "
1379                                                      "columns will be ignored."));
1380                                         parseoptions->cols_exceeded = TRUE;
1381                                 }
1382                                 break;
1383                         }
1384                         if (text && *text) {
1385                                 GnmCell *cell = sheet_cell_fetch (sheet, col, row);
1386                                 if (!go_format_is_text (fmt) &&
1387                                     lcol < parseoptions->formats_decimal->len &&
1388                                     g_ptr_array_index (parseoptions->formats_decimal, lcol)) {
1389                                         GOFormatFamily fam;
1390                                         GnmValue *v = format_match_decimal_number_with_locale
1391                                                 (text, &fam,
1392                                                  g_ptr_array_index (parseoptions->formats_curr, lcol),
1393                                                  g_ptr_array_index (parseoptions->formats_thousand, lcol),
1394                                                  g_ptr_array_index (parseoptions->formats_decimal, lcol));
1395                                         if (!v)
1396                                                 v = value_new_string (text);
1397                                         sheet_cell_set_value (cell, v);
1398                                 } else {
1399
1400                                         stf_cell_set_text (cell, text);
1401                                 }
1402                         }
1403                         col++;
1404                 }
1405
1406                 g_ptr_array_index (lines, lrow) = NULL;
1407                 g_ptr_array_free (line, TRUE);
1408         }
1409         END_LOCALE_SWITCH;
1410
1411         for (lcol = 0, col = start_col;
1412              lcol < parseoptions->col_import_array_len  && col < gnm_sheet_get_max_cols (sheet);
1413              lcol++) {
1414                 if (parseoptions->col_import_array == NULL ||
1415                     parseoptions->col_import_array_len <= lcol ||
1416                     parseoptions->col_import_array[lcol]) {
1417                         if (parseoptions->col_autofit_array == NULL ||
1418                             parseoptions->col_autofit_array[lcol]) {
1419                                 ColRowIndexList *list = colrow_get_index_list (col, col, NULL);
1420                                 ColRowStateGroup  *state = colrow_set_sizes (sheet, TRUE, list, -1, 0, -1);
1421                                 colrow_index_list_destroy (list);
1422                                 g_slist_free (state);
1423                         }
1424                         col++;
1425                 }
1426         }
1427
1428         g_string_chunk_free (lines_chunk);
1429         if (lines)
1430                 stf_parse_general_free (lines);
1431         if (result)
1432                 stf_read_remember_settings (sheet->workbook, parseoptions);
1433         return result;
1434 }
1435
1436 GnmCellRegion *
1437 stf_parse_region (StfParseOptions_t *parseoptions, char const *data, char const *data_end,
1438                   Workbook const *wb)
1439 {
1440         static GODateConventions const default_conv = {FALSE};
1441         GODateConventions const *date_conv = wb ? workbook_date_conv (wb) : &default_conv;
1442
1443         GnmCellRegion *cr;
1444         unsigned int row, colhigh = 0;
1445         GStringChunk *lines_chunk;
1446         GPtrArray *lines;
1447         size_t nformats;
1448
1449         SETUP_LOCALE_SWITCH;
1450
1451         g_return_val_if_fail (parseoptions != NULL, NULL);
1452         g_return_val_if_fail (data != NULL, NULL);
1453
1454         START_LOCALE_SWITCH;
1455
1456         cr = gnm_cell_region_new (NULL);
1457
1458         if (!data_end)
1459                 data_end = data + strlen (data);
1460         lines_chunk = g_string_chunk_new (100 * 1024);
1461         lines = stf_parse_general (parseoptions, lines_chunk, data, data_end);
1462         nformats = parseoptions->formats->len;
1463         for (row = 0; row < lines->len; row++) {
1464                 GPtrArray *line = g_ptr_array_index (lines, row);
1465                 unsigned int col, targetcol = 0;
1466                 for (col = 0; col < line->len; col++) {
1467                         if (parseoptions->col_import_array == NULL ||
1468                             parseoptions->col_import_array_len <= col ||
1469                             parseoptions->col_import_array[col]) {
1470                                 const char *text = g_ptr_array_index (line, col);
1471                                 if (text) {
1472                                         GOFormat *fmt = NULL;
1473                                         GnmValue *v;
1474                                         GnmCellCopy *cc;
1475
1476                                         if (col < nformats)
1477                                                 fmt = g_ptr_array_index (parseoptions->formats, col);
1478                                         v = format_match (text, fmt, date_conv);
1479                                         if (!v)
1480                                                 v = value_new_string (text);
1481
1482                                         cc = gnm_cell_copy_new (cr, targetcol, row);
1483                                         cc->val  = v;
1484                                         cc->texpr = NULL;
1485                                         targetcol++;
1486                                         if (targetcol > colhigh)
1487                                                 colhigh = targetcol;
1488                                 }
1489                         }
1490                 }
1491         }
1492         stf_parse_general_free (lines);
1493         g_string_chunk_free (lines_chunk);
1494
1495         END_LOCALE_SWITCH;
1496
1497         cr->cols    = (colhigh > 0) ? colhigh : 1;
1498         cr->rows    = row;
1499
1500         return cr;
1501 }
1502
1503 static int
1504 int_sort (void const *a, void const *b)
1505 {
1506         return *(int const *)a - *(int const *)b;
1507 }
1508
1509 static int
1510 count_character (GPtrArray *lines, gunichar c, double quantile)
1511 {
1512         int *counts, res;
1513         unsigned int lno, cno;
1514
1515         if (lines->len == 0)
1516                 return 0;
1517
1518         counts = g_new (int, lines->len);
1519         for (lno = cno = 0; lno < lines->len; lno++) {
1520                 int count = 0;
1521                 GPtrArray *boxline = g_ptr_array_index (lines, lno);
1522                 char const *line = g_ptr_array_index (boxline, 0);
1523
1524                 /* Ignore empty lines.  */
1525                 if (*line == 0)
1526                         continue;
1527
1528                 while (*line) {
1529                         if (g_utf8_get_char (line) == c)
1530                                 count++;
1531                         line = g_utf8_next_char (line);
1532                 }
1533
1534                 counts[cno++] = count;
1535         }
1536
1537         if (cno == 0)
1538                 res = 0;
1539         else {
1540                 unsigned int qi = (unsigned int)ceil (quantile * cno);
1541                 qsort (counts, cno, sizeof (counts[0]), int_sort);
1542                 if (qi == cno)
1543                         qi--;
1544                 res = counts[qi];
1545         }
1546
1547         g_free (counts);
1548
1549         return res;
1550 }
1551
1552 static void
1553 dump_guessed_options (const StfParseOptions_t *res)
1554 {
1555         GSList *l;
1556         char ubuffer[6 + 1];
1557         unsigned ui;
1558
1559         g_printerr ("Guessed format:\n");
1560         switch (res->parsetype) {
1561         case PARSE_TYPE_CSV:
1562                 g_printerr ("  type = sep\n");
1563                 g_printerr ("  separator = %s\n",
1564                             res->sep.chr ? res->sep.chr : "(none)");
1565                 g_printerr ("    see two as one = %s\n",
1566                             res->sep.duplicates ? "yes" : "no");
1567                 break;
1568         case PARSE_TYPE_FIXED:
1569                 g_printerr ("  type = sep\n");
1570                 break;
1571         default:
1572                 ;
1573         }
1574         g_printerr ("  trim space = %d\n", res->trim_spaces);
1575
1576         ubuffer[g_unichar_to_utf8 (res->stringindicator, ubuffer)] = 0;
1577         g_printerr ("  string indicator = %s\n", ubuffer);
1578         g_printerr ("    see two as one = %s\n",
1579                     res->indicator_2x_is_single ? "yes" : "no");
1580
1581         g_printerr ("  line terminators =");
1582         for (l = res->terminator; l; l = l->next) {
1583                 const char *t = l->data;
1584                 if (strcmp (t, "\n") == 0)
1585                         g_printerr (" unix");
1586                 else if (strcmp (t, "\r") == 0)
1587                         g_printerr (" mac");
1588                 else if (strcmp (t, "\r\n") == 0)
1589                         g_printerr (" dos");
1590                 else
1591                         g_printerr (" other");
1592         }
1593         g_printerr ("\n");
1594
1595         for (ui = 0; ui < res->formats->len; ui++) {
1596                 GOFormat const *fmt = g_ptr_array_index (res->formats, ui);
1597                 const GString *decimal = ui < res->formats_decimal->len
1598                         ? g_ptr_array_index (res->formats_decimal, ui)
1599                         : NULL;
1600                 const GString *thousand = ui < res->formats_thousand->len
1601                         ? g_ptr_array_index (res->formats_thousand, ui)
1602                         : NULL;
1603
1604                 g_printerr ("  fmt.%d = %s\n", ui, go_format_as_XL (fmt));
1605                 if (decimal)
1606                         g_printerr ("  fmt.%d.dec = %s\n", ui, decimal->str);
1607                 if (thousand)
1608                         g_printerr ("  fmt.%d.thou = %s\n", ui, thousand->str);
1609         }
1610 }
1611
1612 /**
1613  * stf_parse_options_guess:
1614  * @data: the input data.
1615  *
1616  * Returns: (transfer full): the guessed options.
1617  **/
1618 StfParseOptions_t *
1619 stf_parse_options_guess (char const *data)
1620 {
1621         StfParseOptions_t *res;
1622         GStringChunk *lines_chunk;
1623         GPtrArray *lines;
1624         int tabcount;
1625         int sepcount;
1626         gunichar sepchar = go_locale_get_arg_sep ();
1627
1628         g_return_val_if_fail (data != NULL, NULL);
1629
1630         res = stf_parse_options_new ();
1631         lines_chunk = g_string_chunk_new (100 * 1024);
1632         lines = stf_parse_lines (res, lines_chunk, data, 1000, FALSE);
1633
1634         tabcount = count_character (lines, '\t', 0.2);
1635         sepcount = count_character (lines, sepchar, 0.2);
1636
1637         /* At least one tab per line and enough to separate every
1638            would-be sepchars.  */
1639         if (tabcount >= 1 && tabcount >= sepcount - 1)
1640                 stf_parse_options_csv_set_separators (res, "\t", NULL);
1641         else {
1642                 gunichar c;
1643
1644                 /*
1645                  * Try a few more or less likely characters and pick the first
1646                  * one that occurs on at least half the lines.
1647                  *
1648                  * The order is mostly random, although ' ' and '!' which
1649                  * could very easily occur in text are put last.
1650                  */
1651                 if (count_character (lines, (c = sepchar), 0.5) > 0 ||
1652                     count_character (lines, (c = go_locale_get_col_sep ()), 0.5) > 0 ||
1653                     count_character (lines, (c = ':'), 0.5) > 0 ||
1654                     count_character (lines, (c = ','), 0.5) > 0 ||
1655                     count_character (lines, (c = ';'), 0.5) > 0 ||
1656                     count_character (lines, (c = '|'), 0.5) > 0 ||
1657                     count_character (lines, (c = '!'), 0.5) > 0 ||
1658                     count_character (lines, (c = ' '), 0.5) > 0) {
1659                         char sep[7];
1660                         sep[g_unichar_to_utf8 (c, sep)] = 0;
1661                         if (c == ' ')
1662                                 strcat (sep, "\t");
1663                         stf_parse_options_csv_set_separators (res, sep, NULL);
1664                 }
1665         }
1666
1667         // For now, always separated:
1668         stf_parse_options_set_type (res, PARSE_TYPE_CSV);
1669
1670         switch (res->parsetype) {
1671         case PARSE_TYPE_CSV: {
1672                 gboolean dups =
1673                         res->sep.chr &&
1674                         strchr (res->sep.chr, ' ') != NULL;
1675                 gboolean trim =
1676                         res->sep.chr &&
1677                         strchr (res->sep.chr, ' ') != NULL;
1678
1679                 stf_parse_options_set_trim_spaces (res, TRIM_TYPE_LEFT | TRIM_TYPE_RIGHT);
1680                 stf_parse_options_csv_set_indicator_2x_is_single (res, TRUE);
1681                 stf_parse_options_csv_set_duplicates (res, dups);
1682                 stf_parse_options_csv_set_trim_seps (res, trim);
1683
1684                 stf_parse_options_csv_set_stringindicator (res, '"');
1685                 break;
1686         }
1687
1688         case PARSE_TYPE_FIXED:
1689                 break;
1690
1691         default:
1692                 g_assert_not_reached ();
1693         }
1694
1695         stf_parse_general_free (lines);
1696         g_string_chunk_free (lines_chunk);
1697
1698         stf_parse_options_guess_formats (res, data);
1699
1700         if (gnm_debug_flag ("stf"))
1701                 dump_guessed_options (res);
1702
1703         return res;
1704 }
1705
1706 /**
1707  * stf_parse_options_guess_csv:
1708  * @data: the CSV input data.
1709  *
1710  * Returns: (transfer full): the guessed options.
1711  **/
1712 StfParseOptions_t *
1713 stf_parse_options_guess_csv (char const *data)
1714 {
1715         StfParseOptions_t *res;
1716         GStringChunk *lines_chunk;
1717         GPtrArray *lines;
1718         char *sep = NULL;
1719         char const *quoteline = NULL;
1720         int pass;
1721         gunichar stringind = '"';
1722
1723         g_return_val_if_fail (data != NULL, NULL);
1724
1725         res = stf_parse_options_new ();
1726         stf_parse_options_set_type (res, PARSE_TYPE_CSV);
1727         stf_parse_options_set_trim_spaces (res, TRIM_TYPE_LEFT | TRIM_TYPE_RIGHT);
1728         stf_parse_options_csv_set_indicator_2x_is_single (res, TRUE);
1729         stf_parse_options_csv_set_duplicates (res, FALSE);
1730         stf_parse_options_csv_set_trim_seps (res, FALSE);
1731         stf_parse_options_csv_set_stringindicator (res, stringind);
1732
1733         lines_chunk = g_string_chunk_new (100 * 1024);
1734         lines = stf_parse_lines (res, lines_chunk, data, 1000, FALSE);
1735
1736         /*
1737          * Find a line containing a quote; skip first line unless it is
1738          * the only one.  Prefer a line with the quote first.
1739          */
1740         for (pass = 1; !quoteline && pass <= 2; pass++) {
1741                 size_t lno;
1742                 for (lno = MIN (1, lines->len - 1);
1743                      !quoteline && lno < lines->len;
1744                      lno++) {
1745                         GPtrArray *boxline = g_ptr_array_index (lines, lno);
1746                         const char *line = g_ptr_array_index (boxline, 0);
1747                         switch (pass) {
1748                         case 1:
1749                                 if (g_utf8_get_char (line) == stringind)
1750                                         quoteline = line;
1751                                 break;
1752                         case 2:
1753                                 if (my_utf8_strchr (line, stringind))
1754                                         quoteline = line;
1755                                 break;
1756                         }
1757                 }
1758         }
1759
1760         if (quoteline) {
1761                 const char *p0 = my_utf8_strchr (quoteline, stringind);
1762                 const char *p = p0;
1763
1764                 do {
1765                         p = g_utf8_next_char (p);
1766                 } while (*p && g_utf8_get_char (p) != stringind);
1767                 if (*p) p = g_utf8_next_char (p);
1768                 while (*p && g_unichar_isspace (g_utf8_get_char (p)))
1769                         p = g_utf8_next_char (p);
1770                 if (*p) {
1771                         /* Use the character after the quote.  */
1772                         sep = g_strndup (p, g_utf8_next_char (p) - p);
1773                 } else {
1774                         /* Try to use character before the quote.  */
1775                         while (p0 > quoteline && !sep) {
1776                                 p = p0;
1777                                 p0 = g_utf8_prev_char (p0);
1778                                 if (!g_unichar_isspace (g_utf8_get_char (p0)))
1779                                         sep = g_strndup (p0, p - p0);
1780                         }
1781                 }
1782         }
1783
1784         if (!sep)
1785                 sep = g_strdup (",");
1786         stf_parse_options_csv_set_separators (res, sep, NULL);
1787         g_free (sep);
1788
1789         stf_parse_general_free (lines);
1790         g_string_chunk_free (lines_chunk);
1791
1792         stf_parse_options_guess_formats (res, data);
1793
1794         if (gnm_debug_flag ("stf"))
1795                 dump_guessed_options (res);
1796
1797         return res;
1798 }
1799
1800 typedef enum {
1801         STF_GUESS_DATE_DMY = 1,
1802         STF_GUESS_DATE_MDY = 2,
1803         STF_GUESS_DATE_YMD = 4,
1804
1805         STF_GUESS_NUMBER_DEC_POINT = 0x10,
1806         STF_GUESS_NUMBER_DEC_COMMA = 0x20,
1807         STF_GUESS_NUMBER_DEC_EITHER = 0x30,
1808
1809         STF_GUESS_ALL = 0x37
1810 } StfGuessFormats;
1811
1812 static void
1813 do_check_date (const char *data, StfGuessFormats flag,
1814                gboolean mbd, gboolean ybm,
1815                unsigned *possible,
1816                GODateConventions const *date_conv)
1817 {
1818         GnmValue *v;
1819         gboolean this_mbd, this_ybm;
1820         int imbd;
1821
1822         if (!(*possible & flag))
1823                 return;
1824
1825         v = format_match_datetime (data, date_conv, mbd, TRUE, FALSE);
1826         if (!v || !VALUE_FMT (v))
1827                 goto fail;
1828
1829         imbd = go_format_month_before_day (VALUE_FMT (v));
1830         this_mbd = (imbd >= 1);
1831         this_ybm = (imbd == 2);
1832         if (mbd != this_mbd || ybm != this_ybm)
1833                 goto fail;
1834
1835         goto done;
1836
1837 fail:
1838         *possible &= ~flag;
1839 done:
1840         value_release (v);
1841 }
1842
1843
1844 static void
1845 do_check_number (const char *data, StfGuessFormats flag,
1846                  const GString *dec, const GString *thousand, const GString *curr,
1847                  unsigned *possible, int *decimals)
1848 {
1849         GnmValue *v;
1850         GOFormatFamily family;
1851         const char *pthou;
1852
1853         if (!(*possible & flag))
1854                 return;
1855
1856         v = format_match_decimal_number_with_locale (data, &family, curr, thousand, dec);
1857         if (!v)
1858                 goto fail;
1859
1860         if (*decimals != -2) {
1861                 const char *pdec = strstr (data, dec->str);
1862                 int this_decimals = 0;
1863                 if (pdec) {
1864                         pdec += dec->len;
1865                         while (g_ascii_isdigit (*pdec)) {
1866                                 pdec++;
1867                                 this_decimals++;
1868                         }
1869                 }
1870                 if (*decimals == -1)
1871                         *decimals = this_decimals;
1872                 else if (*decimals != this_decimals)
1873                         *decimals = -2;
1874         }
1875
1876         pthou = strstr (data, thousand->str);
1877         if (pthou) {
1878                 const char *p;
1879                 int digits = 0, nonzero_digits = 0;
1880                 for (p = data; p < pthou; p = g_utf8_next_char (p)) {
1881                         if (g_unichar_isdigit (g_utf8_get_char (p))) {
1882                                 digits++;
1883                                 if (*p != '0')
1884                                         nonzero_digits++;
1885                         }
1886                 }
1887                 // "-.222" implies that "." is not a thousands separator.
1888                 // "0.222" implies that "." is not a thousands separator.
1889                 // "12345,555" implies that "," is not a thousands separator.
1890                 if (nonzero_digits == 0 || digits > 3)
1891                         goto fail;
1892         }
1893
1894         goto done;
1895
1896 fail:
1897         *possible &= ~flag;
1898 done:
1899         value_release (v);
1900 }
1901
1902
1903 /**
1904  * stf_parse_options_guess_formats:
1905  * @data: the CSV input data.
1906  *
1907  * This function attempts to recognize data formats on a column-by-column
1908  * basis under the assumption that the data in a text file will generally
1909  * use the same data formats.
1910  *
1911  * This is useful because not all values give sufficient information by
1912  * themselves to tell what format the data is in.  For example, "1/2/2000"
1913  * is likely to be a date in year 2000, but it is not clear if it is in
1914  * January or February.  If another value in the same column is "31/1/1999"
1915  * then it is likely that the former date was in February.
1916  *
1917  * Likewise, a value of "123,456" could mean either 1.23456e5 or 1.23456e2.
1918  * A later value of "111,200.22" would clear up the confusion.
1919  *
1920  **/
1921 void
1922 stf_parse_options_guess_formats (StfParseOptions_t *po, char const *data)
1923 {
1924         GStringChunk *lines_chunk;
1925         GPtrArray *lines;
1926         unsigned lno, col, colcount, sline;
1927         GODateConventions const *date_conv = go_date_conv_from_str ("Lotus:1900");
1928         GString *s_comma = g_string_new (",");
1929         GString *s_dot = g_string_new (".");
1930         GString *s_dollar = g_string_new ("$");
1931         gboolean debug = gnm_debug_flag ("stf");
1932
1933         g_ptr_array_set_size (po->formats, 0);
1934         g_ptr_array_set_size (po->formats_decimal, 0);
1935         g_ptr_array_set_size (po->formats_thousand, 0);
1936         g_ptr_array_set_size (po->formats_curr, 0);
1937
1938         lines_chunk = g_string_chunk_new (100 * 1024);
1939         lines = stf_parse_general (po, lines_chunk, data, data + strlen (data));
1940
1941         colcount = 0;
1942         for (lno = 0; lno < lines->len; lno++) {
1943                 GPtrArray *line = g_ptr_array_index (lines, lno);
1944                 colcount = MAX (colcount, line->len);
1945         }
1946
1947         // Ignore first line unless it is the only one
1948         sline = MIN ((int)lines->len - 1, 1);
1949
1950         g_ptr_array_set_size (po->formats, colcount);
1951         g_ptr_array_set_size (po->formats_decimal, colcount);
1952         g_ptr_array_set_size (po->formats_thousand, colcount);
1953         g_ptr_array_set_size (po->formats_curr, colcount);
1954         for (col = 0; col < colcount; col++) {
1955                 unsigned possible = STF_GUESS_ALL;
1956                 GOFormat *fmt = NULL;
1957                 gboolean seen_dot = FALSE;
1958                 gboolean seen_comma = FALSE;
1959                 int decimals_if_point = -1; // -1: unset; -2: inconsistent; >=0: count
1960                 int decimals_if_comma = -1; // -1: unset; -2: inconsistent; >=0: count
1961
1962                 for (lno = sline; possible && lno < lines->len; lno++) {
1963                         GPtrArray *line = g_ptr_array_index (lines, lno);
1964                         const char *data = col < line->len ? g_ptr_array_index (line, col) : "";
1965                         unsigned prev_possible = possible;
1966
1967                         if (*data == 0 || data[0] == '\'')
1968                                 continue;
1969
1970                         do_check_date (data, STF_GUESS_DATE_DMY, FALSE, FALSE, &possible, date_conv);
1971                         do_check_date (data, STF_GUESS_DATE_MDY, TRUE, FALSE, &possible, date_conv);
1972                         do_check_date (data, STF_GUESS_DATE_YMD, TRUE, TRUE, &possible, date_conv);
1973
1974                         if ((possible & STF_GUESS_NUMBER_DEC_EITHER) == STF_GUESS_NUMBER_DEC_EITHER) {
1975                                 const char *pdot = strstr (data, s_dot->str);
1976                                 const char *pcomma = strstr (data, s_comma->str);
1977                                 if (pdot && pcomma) {
1978                                         // Both -- last one is the decimal separator
1979                                         if (pdot > pcomma)
1980                                                 possible &= ~STF_GUESS_NUMBER_DEC_COMMA;
1981                                         else
1982                                                 possible &= ~STF_GUESS_NUMBER_DEC_POINT;
1983                                 } else if (pdot && strstr (pdot + s_dot->len, s_dot->str)) {
1984                                         // Two dots so they are thousands separators
1985                                         possible &= ~STF_GUESS_NUMBER_DEC_POINT;
1986                                 } else if (pcomma && strstr (pcomma + s_comma->len, s_comma->str)) {
1987                                         // Two commas so they are thousands separators
1988                                         possible &= ~STF_GUESS_NUMBER_DEC_COMMA;
1989                                 }
1990
1991                                 seen_dot = seen_dot || (pdot != 0);
1992                                 seen_comma = seen_comma || (pcomma != 0);
1993                         }
1994                         do_check_number (data, STF_GUESS_NUMBER_DEC_POINT,
1995                                          s_dot, s_comma, s_dollar,
1996                                          &possible, &decimals_if_point);
1997                         do_check_number (data, STF_GUESS_NUMBER_DEC_COMMA,
1998                                          s_comma, s_dot, s_dollar,
1999                                          &possible, &decimals_if_comma);
2000
2001                         if (possible != prev_possible && debug)
2002                                 g_printerr ("col=%d; after [%s] possible=0x%x\n", col, data, possible);
2003                 }
2004
2005                 if ((possible & STF_GUESS_NUMBER_DEC_EITHER) == STF_GUESS_NUMBER_DEC_EITHER &&
2006                     !seen_dot && !seen_comma) {
2007                         // It doesn't matter what the separators are
2008                         possible &= ~STF_GUESS_NUMBER_DEC_COMMA;
2009                 }
2010
2011                 switch (possible) {
2012                 case STF_GUESS_DATE_DMY:
2013                         fmt = go_format_new_from_XL ("d-mmm-yyyy");
2014                         break;
2015                 case STF_GUESS_DATE_MDY:
2016                         fmt = go_format_new_from_XL ("m/d/yyyy");
2017                         break;
2018                 case STF_GUESS_DATE_YMD:
2019                         fmt = go_format_new_from_XL ("yyyy-mm-dd");
2020                         break;
2021                 case STF_GUESS_NUMBER_DEC_POINT:
2022                         g_ptr_array_index (po->formats_decimal, col) = g_string_new (".");
2023                         g_ptr_array_index (po->formats_thousand, col) = g_string_new (",");
2024                         g_ptr_array_index (po->formats_curr, col) = g_string_new (s_dollar->str);
2025                         if (decimals_if_point > 0) {
2026                                 // Don't set format if decimals is zero
2027                                 GString *fmt_str = g_string_new (NULL);
2028                                 go_format_generate_number_str (fmt_str, 1, decimals_if_point, seen_comma, FALSE, FALSE, "", "");
2029                                 fmt = go_format_new_from_XL (fmt_str->str);
2030                                 g_string_free (fmt_str, TRUE);
2031                         }
2032                         break;
2033                 case STF_GUESS_NUMBER_DEC_COMMA:
2034                         g_ptr_array_index (po->formats_decimal, col) = g_string_new (",");
2035                         g_ptr_array_index (po->formats_thousand, col) = g_string_new (".");
2036                         g_ptr_array_index (po->formats_curr, col) = g_string_new (s_dollar->str);
2037                         if (decimals_if_comma > 0) {
2038                                 // Don't set format if decimals is zero
2039                                 GString *fmt_str = g_string_new (NULL);
2040                                 go_format_generate_number_str (fmt_str, 1, decimals_if_comma, seen_dot, FALSE, FALSE, "", "");
2041                                 fmt = go_format_new_from_XL (fmt_str->str);
2042                                 g_string_free (fmt_str, TRUE);
2043                         }
2044                         break;
2045                 default:
2046                         break;
2047                 }
2048
2049                 if (!fmt)
2050                         fmt = go_format_ref (go_format_general ());
2051                 g_ptr_array_index (po->formats, col) = fmt;
2052         }
2053
2054         stf_parse_general_free (lines);
2055         g_string_chunk_free (lines_chunk);
2056
2057         g_string_free (s_dot, TRUE);
2058         g_string_free (s_comma, TRUE);
2059         g_string_free (s_dollar, TRUE);
2060 }