1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/csv-file-writer.h"
29 #include "data/calendar.h"
30 #include "data/case.h"
31 #include "data/casewriter-provider.h"
32 #include "data/casewriter.h"
33 #include "data/data-out.h"
34 #include "data/dictionary.h"
35 #include "data/file-handle-def.h"
36 #include "data/format.h"
37 #include "data/make-file.h"
38 #include "data/missing-values.h"
39 #include "data/settings.h"
40 #include "data/value-labels.h"
41 #include "data/variable.h"
42 #include "libpspp/assertion.h"
43 #include "libpspp/i18n.h"
44 #include "libpspp/message.h"
45 #include "libpspp/str.h"
47 #include "gl/ftoastr.h"
48 #include "gl/minmax.h"
49 #include "gl/unlocked-io.h"
50 #include "gl/xalloc.h"
53 #define _(msgid) gettext (msgid)
54 #define N_(msgid) (msgid)
56 /* A variable in a CSV file. */
59 int width
; /* Variable width (0 to 32767). */
60 int case_index
; /* Index into case. */
61 struct fmt_spec format
; /* Print format. */
62 struct missing_values missing
; /* User-missing values, if recoding. */
63 struct val_labs
*val_labs
; /* Value labels, if any and they are in use. */
66 /* Comma-separated value (CSV) file writer. */
69 struct file_handle
*fh
; /* File handle. */
70 struct fh_lock
*lock
; /* Mutual exclusion for file. */
71 FILE *file
; /* File stream. */
72 struct replace_file
*rf
; /* Ticket for replacing output file. */
74 struct csv_writer_options opts
;
76 char *encoding
; /* Encoding used by variables. */
79 struct csv_var
*csv_vars
; /* Variables. */
80 size_t n_csv_vars
; /* Number of variables. */
83 static const struct casewriter_class csv_file_casewriter_class
;
85 static void write_var_names (struct csv_writer
*, const struct dictionary
*);
87 static bool write_error (const struct csv_writer
*);
88 static bool close_writer (struct csv_writer
*);
90 /* Opens the CSV file designated by file handle FH for writing cases from
91 dictionary DICT according to the given OPTS.
93 No reference to D is retained, so it may be modified or
94 destroyed at will after this function returns. */
96 csv_writer_open (struct file_handle
*fh
, const struct dictionary
*dict
,
97 const struct csv_writer_options
*opts
)
102 /* Create and initialize writer. */
103 w
= xmalloc (sizeof *w
);
111 w
->encoding
= xstrdup (dict_get_encoding (dict
));
113 w
->n_csv_vars
= dict_get_n_vars (dict
);
114 w
->csv_vars
= xnmalloc (w
->n_csv_vars
, sizeof *w
->csv_vars
);
115 for (i
= 0; i
< w
->n_csv_vars
; i
++)
117 const struct variable
*var
= dict_get_var (dict
, i
);
118 struct csv_var
*cv
= &w
->csv_vars
[i
];
120 cv
->width
= var_get_width (var
);
121 cv
->case_index
= var_get_case_index (var
);
123 cv
->format
= *var_get_print_format (var
);
124 if (opts
->recode_user_missing
)
125 mv_copy (&cv
->missing
, var_get_missing_values (var
));
127 mv_init (&cv
->missing
, cv
->width
);
129 if (opts
->use_value_labels
)
130 cv
->val_labs
= val_labs_clone (var_get_value_labels (var
));
135 /* Open file handle as an exclusive writer. */
136 /* TRANSLATORS: this fragment will be interpolated into messages in fh_lock()
137 that identify types of files. */
138 w
->lock
= fh_lock (fh
, FH_REF_FILE
, N_("CSV file"), FH_ACC_WRITE
, true);
142 /* Create the file on disk. */
143 w
->rf
= replace_file_start (fh
, "w", 0666, &w
->file
);
146 msg (ME
, _("Error opening `%s' for writing as a CSV file: %s."),
147 fh_get_file_name (fh
), strerror (errno
));
151 if (opts
->include_var_names
)
152 write_var_names (w
, dict
);
157 return casewriter_create (dict_get_proto (dict
),
158 &csv_file_casewriter_class
, w
);
166 csv_field_needs_quoting (struct csv_writer
*w
, const char *s
, size_t len
)
170 for (p
= s
; p
< &s
[len
]; p
++)
171 if (*p
== w
->opts
.qualifier
|| *p
== w
->opts
.delimiter
172 || *p
== '\n' || *p
== '\r')
179 csv_output_buffer (struct csv_writer
*w
, const char *s
, size_t len
)
181 if (csv_field_needs_quoting (w
, s
, len
))
185 putc (w
->opts
.qualifier
, w
->file
);
186 for (p
= s
; p
< &s
[len
]; p
++)
188 /* We are writing the output file in text mode, so transform any
189 explicit CR-LF line breaks into LF only, to allow the C library to
190 use correct system-specific new-lines. */
191 if (*p
== '\r' && p
[1] == '\n')
194 if (*p
== w
->opts
.qualifier
)
195 putc (w
->opts
.qualifier
, w
->file
);
198 putc (w
->opts
.qualifier
, w
->file
);
201 fwrite (s
, 1, len
, w
->file
);
205 csv_output_string (struct csv_writer
*w
, const char *s
)
207 csv_output_buffer (w
, s
, strlen (s
));
211 write_var_names (struct csv_writer
*w
, const struct dictionary
*d
)
215 for (i
= 0; i
< w
->n_csv_vars
; i
++)
218 putc (w
->opts
.delimiter
, w
->file
);
219 csv_output_string (w
, var_get_name (dict_get_var (d
, i
)));
221 putc ('\n', w
->file
);
225 csv_output_format (struct csv_writer
*w
, const struct csv_var
*cv
,
226 const union value
*value
)
228 char *s
= data_out (value
, w
->encoding
, &cv
->format
,
229 settings_get_fmt_settings ());
230 struct substring ss
= ss_cstr (s
);
231 if (cv
->format
.type
!= FMT_A
)
232 ss_trim (&ss
, ss_cstr (" "));
234 ss_rtrim (&ss
, ss_cstr (" "));
235 csv_output_buffer (w
, ss
.string
, ss
.length
);
240 extract_date (double number
, int *y
, int *m
, int *d
)
244 calendar_offset_to_gregorian (number
/ 60. / 60. / 24., y
, m
, d
, &yd
);
245 return fmod (number
, 60. * 60. * 24.);
249 extract_time (double number
, double *H
, int *M
, int *S
)
251 *H
= floor (number
/ 60. / 60.);
252 number
= fmod (number
, 60. * 60.);
254 *M
= floor (number
/ 60.);
255 number
= fmod (number
, 60.);
261 csv_write_var__ (struct csv_writer
*w
, const struct csv_var
*cv
,
262 const union value
*value
)
266 label
= val_labs_find (cv
->val_labs
, value
);
268 csv_output_string (w
, label
);
269 else if (cv
->width
== 0 && value
->f
== SYSMIS
)
270 csv_output_buffer (w
, " ", 1);
271 else if (w
->opts
.use_print_formats
)
272 csv_output_format (w
, cv
, value
);
275 char s
[MAX (DBL_STRLEN_BOUND
, 128)];
278 switch (cv
->format
.type
)
302 dtoastr (s
, sizeof s
, 0, 0, value
->f
);
303 cp
= strpbrk (s
, ".,");
305 *cp
= w
->opts
.decimal
;
322 extract_date (value
->f
, &y
, &m
, &d
);
323 snprintf (s
, sizeof s
, "%02d/%02d/%04d", m
, d
, y
);
336 extract_time (extract_date (value
->f
, &y
, &m
, &d
), &H
, &M
, &S
);
337 snprintf (s
, sizeof s
, "%02d/%02d/%04d %02.0f:%02d:%02d",
349 extract_time (fabs (value
->f
), &H
, &M
, &S
);
350 snprintf (s
, sizeof s
, "%s%02.0f:%02d:%02d",
351 value
->f
< 0 ? "-" : "", H
, M
, S
);
357 csv_output_format (w
, cv
, value
);
360 case FMT_NUMBER_OF_FORMATS
:
363 csv_output_string (w
, s
);
368 csv_write_var (struct csv_writer
*w
, const struct csv_var
*cv
,
369 const union value
*value
)
371 if (mv_is_value_missing (&cv
->missing
, value
) == MV_USER
)
375 value_init (&missing
, cv
->width
);
376 value_set_missing (&missing
, cv
->width
);
377 csv_write_var__ (w
, cv
, &missing
);
378 value_destroy (&missing
, cv
->width
);
381 csv_write_var__ (w
, cv
, value
);
385 csv_write_case (struct csv_writer
*w
, const struct ccase
*c
)
389 for (i
= 0; i
< w
->n_csv_vars
; i
++)
391 const struct csv_var
*cv
= &w
->csv_vars
[i
];
394 putc (w
->opts
.delimiter
, w
->file
);
395 csv_write_var (w
, cv
, case_data_idx (c
, cv
->case_index
));
397 putc ('\n', w
->file
);
400 /* Writes case C to CSV file W. */
402 csv_file_casewriter_write (struct casewriter
*writer
, void *w_
,
405 struct csv_writer
*w
= w_
;
407 if (ferror (w
->file
))
409 casewriter_force_error (writer
);
414 csv_write_case (w
, c
);
418 /* Destroys CSV file writer W. */
420 csv_file_casewriter_destroy (struct casewriter
*writer
, void *w_
)
422 struct csv_writer
*w
= w_
;
423 if (!close_writer (w
))
424 casewriter_force_error (writer
);
427 /* Returns true if an I/O error has occurred on WRITER, false otherwise. */
429 write_error (const struct csv_writer
*writer
)
431 return ferror (writer
->file
);
434 /* Closes a CSV file after we're done with it.
435 Returns true if successful, false if an I/O error occurred. */
437 close_writer (struct csv_writer
*w
)
450 if (fclose (w
->file
) == EOF
)
454 msg (ME
, _("An I/O error occurred writing CSV file `%s'."),
455 fh_get_file_name (w
->fh
));
457 if (ok
? !replace_file_commit (w
->rf
) : !replace_file_abort (w
->rf
))
466 for (i
= 0; i
< w
->n_csv_vars
; i
++)
468 struct csv_var
*cv
= &w
->csv_vars
[i
];
469 mv_destroy (&cv
->missing
);
470 val_labs_destroy (cv
->val_labs
);
479 /* CSV file writer casewriter class. */
480 static const struct casewriter_class csv_file_casewriter_class
=
482 csv_file_casewriter_write
,
483 csv_file_casewriter_destroy
,