1 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
3 * stf.c : Utilizes the stf-parse engine and the dialog-stf to provide a plug-in for
4 * importing text files with a structure (CSV/fixed width)
6 * Copyright (C) Almer. S. Tigelaar <almer@gnome.org>
7 * Copyright (C) 1999-2009 Morten Welinder (terra@gnome.org)
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, see <https://www.gnu.org/licenses/>.
23 #include <gnumeric-config.h>
24 #include <glib/gi18n-lib.h>
27 #include "stf-export.h"
29 #include <goffice/goffice.h>
32 #include "sheet-view.h"
33 #include "sheet-style.h"
36 #include "command-context.h"
38 #include "workbook-view.h"
40 #include "dialog-stf.h"
41 #include "dialog-stf-export.h"
45 #include "gnm-format.h"
46 #include "selection.h"
48 #include "clipboard.h"
49 #include "parse-util.h"
54 #include <gsf/gsf-input.h>
56 #include <gsf/gsf-output.h>
57 #include <gsf/gsf-output-memory.h>
58 #include <gsf/gsf-utils.h>
63 stf_warning (GOIOContext
*context
, char const *msg
)
66 * Using go_cmd_context_error_import will destroy the
67 * successfully imported portion. We ought to have a
68 * way to issue a warning.
70 if (GNM_IS_WBC_GTK (context
->impl
))
72 (wbcg_toplevel (WBC_GTK (context
->impl
)),
76 g_warning ("%s", msg
);
82 * @filename: name of the file to open&read
84 * Will open filename, read the file into a g_alloced memory buffer
86 * NOTE : The returned buffer has to be g_freed by the calling routine.
88 * returns : a buffer containing the file contents
91 stf_open_and_read (G_GNUC_UNUSED GOIOContext
*context
, GsfInput
*input
, size_t *readsize
)
95 gsf_off_t size
= gsf_input_size (input
);
97 if (gsf_input_seek (input
, 0, G_SEEK_SET
))
100 *readsize
= (size_t) size
;
101 if ((gsf_off_t
) *readsize
!= size
) /* Check for overflow */
104 allocsize
= (gulong
) size
;
105 if ((gsf_off_t
) allocsize
!= size
) /* Check for overflow */
107 result
= g_try_malloc (allocsize
);
111 *((char *)result
+ *readsize
) = '\0';
113 if (*readsize
> 0 && gsf_input_read (input
, *readsize
, result
) == NULL
) {
114 g_warning ("gsf_input_read failed.");
122 stf_preparse (GOIOContext
*context
, GsfInput
*input
, size_t *data_len
)
126 data
= stf_open_and_read (context
, input
, data_len
);
130 go_cmd_context_error_import (GO_CMD_CONTEXT (context
),
131 _("Error while trying to read file"));
139 stf_store_results (DialogStfResult_t
*dialogresult
,
140 Sheet
*sheet
, int start_col
, int start_row
)
142 return stf_parse_sheet (dialogresult
->parseoptions
,
143 dialogresult
->text
, NULL
, sheet
,
144 start_col
, start_row
);
148 resize_columns (Sheet
*sheet
)
152 if (gnm_debug_flag ("stf"))
153 g_printerr ("Auto-fitting columns...\n");
155 /* If we have lots of rows, auto-fitting will take a very long
156 time. It is probably better to look at only, say, 1000 rows
158 range_init_full_sheet (&r
, sheet
);
159 r
.end
.row
= MIN (r
.end
.row
, 1000);
161 colrow_autofit (sheet
, &r
, TRUE
,
162 TRUE
, /* Ignore strings */
163 TRUE
, /* Don't shrink */
164 TRUE
, /* Don't shrink */
166 if (gnm_debug_flag ("stf"))
167 g_printerr ("Auto-fitting columns... done\n");
169 sheet_queue_respan (sheet
, 0, gnm_sheet_get_last_row (sheet
));
176 * @enc: encoding of file
177 * @context: command context
179 * @input: file to read from+convert
181 * Main routine, handles importing a file including all dialog mumbo-jumbo
184 stf_read_workbook (G_GNUC_UNUSED GOFileOpener
const *fo
, gchar
const *enc
,
185 GOIOContext
*context
, GoView
*view
, GsfInput
*input
)
187 DialogStfResult_t
*dialogresult
= NULL
;
188 char *name
, *nameutf8
= NULL
;
191 WorkbookView
*wbv
= GNM_WORKBOOK_VIEW (view
);
193 if (!GNM_IS_WBC_GTK (context
->impl
)) {
194 go_io_error_string (context
, _("This importer can only be used with a GUI."));
198 name
= g_path_get_basename (gsf_input_name (input
));
199 nameutf8
= g_filename_to_utf8 (name
, -1, NULL
, NULL
, NULL
);
202 g_warning ("Failed to convert filename to UTF-8. This shouldn't happen here.");
206 data
= stf_preparse (context
, input
, &data_len
);
210 dialogresult
= stf_dialog (WBC_GTK (context
->impl
),
211 enc
, FALSE
, NULL
, FALSE
,
212 nameutf8
, data
, data_len
);
213 if (dialogresult
!= NULL
) {
214 Workbook
*book
= wb_view_get_workbook (wbv
);
215 int cols
= dialogresult
->colcount
, rows
= dialogresult
->rowcount
;
218 gnm_sheet_suggest_size (&cols
, &rows
);
219 sheet
= sheet_new (book
, nameutf8
, cols
, rows
);
220 workbook_sheet_attach (book
, sheet
);
221 if (stf_store_results (dialogresult
, sheet
, 0, 0)) {
222 workbook_recalc_all (book
);
223 resize_columns (sheet
);
224 workbook_set_saveinfo
226 GO_FILE_FL_WRITE_ONLY
,
228 ("Gnumeric_stf:stf_assistant"));
230 /* the user has cancelled */
231 /* the caller should notice that we have no sheets */
232 workbook_sheet_delete (sheet
);
239 if (dialogresult
!= NULL
)
240 stf_dialog_result_free (dialogresult
);
244 cb_get_content (GnmCellIter
const *iter
, GsfOutput
*buf
)
248 if (NULL
!= (cell
= iter
->cell
)) {
250 if (gnm_cell_has_expr (cell
))
251 tmp
= gnm_expr_top_as_string (cell
->base
.texpr
,
252 &iter
->pp
, iter
->pp
.sheet
->convs
);
253 else if (VALUE_FMT (cell
->value
) != NULL
)
254 tmp
= format_value (NULL
, cell
->value
, -1,
255 workbook_date_conv (iter
->pp
.wb
));
257 tmp
= value_get_as_string (cell
->value
);
259 gsf_output_write (buf
, strlen (tmp
), tmp
);
262 gsf_output_write (buf
, 1, "\n");
268 * stf_text_to_columns:
269 * @wbc: The control making the request
272 * Main routine, handles importing a file including all dialog mumbo-jumbo
275 stf_text_to_columns (WorkbookControl
*wbc
, GOCmdContext
*cc
)
277 DialogStfResult_t
*dialogresult
= NULL
;
279 Sheet
*src_sheet
, *target_sheet
;
286 sv
= wb_control_cur_sheet_view (wbc
);
287 src_sheet
= sv_sheet (sv
);
288 src
= selection_first_range (sv
, cc
, _("Text to Columns"));
291 if (range_width (src
) > 1) {
292 go_cmd_context_error (cc
, g_error_new (go_error_invalid (), 0,
293 _("Only one column of input data can be parsed at a time")));
297 /* FIXME : how to do this cleanly ? */
298 if (!GNM_IS_WBC_GTK (wbc
))
301 #warning Add UI for this
302 target_sheet
= src_sheet
;
304 range_translate (&target
, target_sheet
, 1, 0);
306 buf
= gsf_output_memory_new ();
307 sheet_foreach_cell_in_range (src_sheet
, CELL_ITER_ALL
, src
,
308 (CellIterFunc
) &cb_get_content
, buf
);
310 gsf_output_close (buf
);
311 data
= gsf_output_memory_get_bytes (GSF_OUTPUT_MEMORY (buf
));
312 data_len
= (size_t)gsf_output_size (buf
);
314 go_cmd_context_error_import (GO_CMD_CONTEXT (cc
),
315 _("There is no data "
318 dialogresult
= stf_dialog (WBC_GTK (wbc
),
319 NULL
, FALSE
, NULL
, FALSE
,
320 _("Text to Columns"),
323 if (dialogresult
!= NULL
) {
324 GnmCellRegion
*cr
= stf_parse_region (dialogresult
->parseoptions
,
325 dialogresult
->text
, NULL
, target_sheet
->workbook
);
327 stf_dialog_result_attach_formats_to_cr (dialogresult
, cr
);
328 target
.end
.col
= target
.start
.col
+ cr
->cols
- 1;
329 target
.end
.row
= target
.start
.row
+ cr
->rows
- 1;
332 cmd_text_to_columns (wbc
, src
, src_sheet
,
333 &target
, target_sheet
, cr
))
334 go_cmd_context_error_import (GO_CMD_CONTEXT (cc
),
335 _("Error while trying to "
336 "parse data into sheet"));
337 stf_dialog_result_free (dialogresult
);
340 g_object_unref (buf
);
344 clear_stray_NULs (GOIOContext
*context
, GString
*utf8data
)
346 char *cpointer
, *endpointer
;
348 char const *valid_end
;
350 cpointer
= utf8data
->str
;
351 endpointer
= utf8data
->str
+ utf8data
->len
;
352 while (*cpointer
!= 0)
354 while (cpointer
!= endpointer
) {
357 while (*cpointer
!= 0)
360 if (null_chars
> 0) {
363 format
= ngettext ("The file contains %d NUL character. "
364 "It has been changed to a space.",
365 "The file contains %d NUL characters. "
366 "They have been changed to spaces.",
368 msg
= g_strdup_printf (format
, null_chars
);
369 stf_warning (context
, msg
);
373 if (!g_utf8_validate (utf8data
->str
, utf8data
->len
, &valid_end
)) {
374 g_string_truncate (utf8data
, valid_end
- utf8data
->str
);
375 stf_warning (context
, _("The file contains invalid UTF-8 encoded characters and has been truncated"));
380 * stf_read_workbook_auto_csvtab:
382 * @enc: optional encoding
383 * @context: command context
385 * @input: file to read from+convert
387 * Attempt to auto-detect CSV or tab-delimited file
390 stf_read_workbook_auto_csvtab (G_GNUC_UNUSED GOFileOpener
const *fo
, gchar
const *enc
,
391 GOIOContext
*context
,
392 GoView
*view
, GsfInput
*input
)
400 StfParseOptions_t
*po
;
403 GStringChunk
*lines_chunk
;
405 WorkbookView
*wbv
= GNM_WORKBOOK_VIEW (view
);
407 g_return_if_fail (context
!= NULL
);
408 g_return_if_fail (wbv
!= NULL
);
410 book
= wb_view_get_workbook (wbv
);
412 data
= stf_preparse (context
, input
, &data_len
);
416 enc
= go_guess_encoding (data
, data_len
, enc
, &utf8data
, NULL
);
420 go_cmd_context_error_import (GO_CMD_CONTEXT (context
),
421 _("That file is not in the given encoding."));
425 clear_stray_NULs (context
, utf8data
);
428 * Try to get the filename we're reading from. This is not a
431 gsfname
= gsf_input_name (input
);
434 const char *ext
= gsf_extension_pointer (gsfname
);
435 gboolean iscsv
= ext
&& strcasecmp (ext
, "csv") == 0;
437 po
= stf_parse_options_guess_csv (utf8data
->str
);
439 po
= stf_parse_options_guess (utf8data
->str
);
442 lines_chunk
= g_string_chunk_new (100 * 1024);
443 lines
= stf_parse_general (po
, lines_chunk
,
444 utf8data
->str
, utf8data
->str
+ utf8data
->len
);
447 for (i
= 0; i
< rows
; i
++) {
448 GPtrArray
*line
= g_ptr_array_index (lines
, i
);
449 cols
= MAX (cols
, (int)line
->len
);
451 gnm_sheet_suggest_size (&cols
, &rows
);
452 stf_parse_general_free (lines
);
453 g_string_chunk_free (lines_chunk
);
455 name
= g_path_get_basename (gsfname
);
456 sheet
= sheet_new (book
, name
, cols
, rows
);
458 workbook_sheet_attach (book
, sheet
);
460 if (stf_parse_sheet (po
, utf8data
->str
, NULL
, sheet
, 0, 0)) {
462 workbook_recalc_all (book
);
463 resize_columns (sheet
);
464 if (po
->cols_exceeded
|| po
->rows_exceeded
) {
465 stf_warning (context
,
466 _("Some data did not fit on the "
467 "sheet and was dropped."));
469 is_csv
= po
->sep
.chr
&& po
->sep
.chr
[0] == ',';
470 workbook_set_saveinfo
472 GO_FILE_FL_WRITE_ONLY
,
474 (is_csv
? "Gnumeric_stf:stf_csv" : "Gnumeric_stf:stf_assistant"));
476 workbook_sheet_delete (sheet
);
477 go_cmd_context_error_import (GO_CMD_CONTEXT (context
),
478 _("Parse error while trying to parse data into sheet"));
482 stf_parse_options_free (po
);
483 g_string_free (utf8data
, TRUE
);
486 /***********************************************************************************/
489 stf_write_csv (G_GNUC_UNUSED GOFileSaver
const *fs
, GOIOContext
*context
,
490 GoView
const *view
, GsfOutput
*output
)
493 GnmRangeRef
const *range
;
494 WorkbookView
*wbv
= GNM_WORKBOOK_VIEW (view
);
496 GnmStfExport
*config
= g_object_new
497 (GNM_STF_EXPORT_TYPE
,
499 "quoting-triggers", ", \t\n\"",
502 /* FIXME: this is crap in both branches of the "if". */
503 range
= g_object_get_data (G_OBJECT (wb_view_get_workbook (wbv
)), "ssconvert-range");
504 if (range
&& range
->a
.sheet
)
505 sheet
= range
->a
.sheet
;
507 sheet
= wb_view_cur_sheet (wbv
);
509 gnm_stf_export_options_sheet_list_add (config
, sheet
);
511 if (gnm_stf_export (config
) == FALSE
)
512 go_cmd_context_error_import (GO_CMD_CONTEXT (context
),
513 _("Error while trying to write CSV file"));
515 g_object_unref (config
);
519 csv_tsv_probe (GOFileOpener
const *fo
, GsfInput
*input
, GOFileProbeLevel pl
)
521 /* Rough and ready heuristic. If the first N bytes have no
522 * unprintable characters this may be text */
523 const gsf_off_t N
= 512;
525 if (pl
== GO_FILE_PROBE_CONTENT
) {
526 guint8
const *header
;
528 char const *enc
= NULL
;
529 GString
*header_utf8
;
533 if (gsf_input_seek (input
, 0, G_SEEK_SET
))
535 i
= gsf_input_remaining (input
);
537 /* If someone ships us an empty file, accept it only if
538 it has a proper name. */
540 return csv_tsv_probe (fo
, input
, GO_FILE_PROBE_FILE_NAME
);
543 if (NULL
== (header
= gsf_input_read (input
, i
, NULL
)))
546 enc
= go_guess_encoding (header
, i
, NULL
, &header_utf8
, NULL
);
550 for (p
= header_utf8
->str
; *p
; p
= g_utf8_next_char (p
)) {
551 gunichar uc
= g_utf8_get_char (p
);
552 /* isprint might not be true for these: */
553 if (uc
== '\n' || uc
== '\t' || uc
== '\r')
555 /* Also, ignore a byte-order mark which may be used to
556 * indicate UTF-8; see
557 * http://en.wikipedia.org/wiki/Byte_Order_Mark for
560 if (p
== header_utf8
->str
&& uc
== 0x0000FEFF) {
563 if (!g_unichar_isprint (uc
)) {
569 g_string_free (header_utf8
, TRUE
);
572 char const *name
= gsf_input_name (input
);
575 name
= gsf_extension_pointer (name
);
576 return (name
!= NULL
&&
577 (g_ascii_strcasecmp (name
, "csv") == 0 ||
578 g_ascii_strcasecmp (name
, "tsv") == 0 ||
579 g_ascii_strcasecmp (name
, "txt") == 0));
589 GSList
*suffixes
= go_slist_create (
594 GSList
*mimes
= go_slist_create (
595 g_strdup ("application/tab-separated-values"),
596 g_strdup ("text/comma-separated-values"),
597 g_strdup ("text/csv"),
598 g_strdup ("text/x-csv"),
599 g_strdup ("text/spreadsheet"),
600 g_strdup ("text/tab-separated-values"),
602 GSList
*mimes_txt
= go_slist_create (
603 g_strdup ("text/plain"),
604 g_strdup ("text/csv"),
605 g_strdup ("text/x-csv"),
606 g_strdup ("text/comma-separated-values"),
607 g_strdup ("text/tab-separated-values"),
610 GOFileOpener
*opener
;
612 opener
= go_file_opener_new_with_enc (
613 "Gnumeric_stf:stf_csvtab",
614 _("Comma or tab separated values (CSV/TSV)"),
616 csv_tsv_probe
, stf_read_workbook_auto_csvtab
);
617 go_file_opener_register (opener
, 0);
618 g_object_unref (opener
);
620 opener
= go_file_opener_new_with_enc (
621 "Gnumeric_stf:stf_assistant",
622 _("Text import (configurable)"),
624 NULL
, stf_read_workbook
);
625 g_object_set (G_OBJECT (opener
), "interactive-only", TRUE
, NULL
);
626 go_file_opener_register (opener
, 0);
627 g_object_unref (opener
);
629 saver
= gnm_stf_file_saver_create ("Gnumeric_stf:stf_assistant");
630 /* Unlike the opener, the saver doesn't require interaction. */
631 go_file_saver_register (saver
);
632 g_object_unref (saver
);
634 saver
= go_file_saver_new (
635 "Gnumeric_stf:stf_csv", "csv",
636 _("Comma separated values (CSV)"),
637 GO_FILE_FL_MANUAL_REMEMBER
, stf_write_csv
);
638 go_file_saver_set_save_scope (saver
, GO_FILE_SAVE_SHEET
);
639 go_file_saver_register (saver
);
640 g_object_unref (saver
);
644 * stf_shutdown: (skip)
649 go_file_saver_unregister
650 (go_file_saver_for_id ("Gnumeric_stf:stf_assistant"));
651 go_file_saver_unregister
652 (go_file_saver_for_id ("Gnumeric_stf:stf_csv"));
654 go_file_opener_unregister
655 (go_file_opener_for_id ("Gnumeric_stf:stf_csvtab"));
656 go_file_opener_unregister
657 (go_file_opener_for_id ("Gnumeric_stf:stf_assistant"));