Change how checking for missing values works.
[pspp.git] / src / data / csv-file-writer.c
blobc00a2cffed4df0a5a91e87b21f05ed8780d07fbe
1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 #include <config.h>
19 #include "data/csv-file-writer.h"
21 #include <ctype.h>
22 #include <errno.h>
23 #include <math.h>
24 #include <stdint.h>
25 #include <stdlib.h>
26 #include <sys/stat.h>
27 #include <time.h>
29 #include "data/calendar.h"
30 #include "data/case.h"
31 #include "data/casewriter-provider.h"
32 #include "data/casewriter.h"
33 #include "data/data-out.h"
34 #include "data/dictionary.h"
35 #include "data/file-handle-def.h"
36 #include "data/format.h"
37 #include "data/make-file.h"
38 #include "data/missing-values.h"
39 #include "data/settings.h"
40 #include "data/value-labels.h"
41 #include "data/variable.h"
42 #include "libpspp/assertion.h"
43 #include "libpspp/i18n.h"
44 #include "libpspp/message.h"
45 #include "libpspp/str.h"
47 #include "gl/ftoastr.h"
48 #include "gl/minmax.h"
49 #include "gl/unlocked-io.h"
50 #include "gl/xalloc.h"
52 #include "gettext.h"
53 #define _(msgid) gettext (msgid)
54 #define N_(msgid) (msgid)
56 /* A variable in a CSV file. */
57 struct csv_var
59 int width; /* Variable width (0 to 32767). */
60 int case_index; /* Index into case. */
61 struct fmt_spec format; /* Print format. */
62 struct missing_values missing; /* User-missing values, if recoding. */
63 struct val_labs *val_labs; /* Value labels, if any and they are in use. */
66 /* Comma-separated value (CSV) file writer. */
67 struct csv_writer
69 struct file_handle *fh; /* File handle. */
70 struct fh_lock *lock; /* Mutual exclusion for file. */
71 FILE *file; /* File stream. */
72 struct replace_file *rf; /* Ticket for replacing output file. */
74 struct csv_writer_options opts;
76 char *encoding; /* Encoding used by variables. */
78 /* Variables. */
79 struct csv_var *csv_vars; /* Variables. */
80 size_t n_csv_vars; /* Number of variables. */
83 static const struct casewriter_class csv_file_casewriter_class;
85 static void write_var_names (struct csv_writer *, const struct dictionary *);
87 static bool write_error (const struct csv_writer *);
88 static bool close_writer (struct csv_writer *);
90 /* Opens the CSV file designated by file handle FH for writing cases from
91 dictionary DICT according to the given OPTS.
93 No reference to D is retained, so it may be modified or
94 destroyed at will after this function returns. */
95 struct casewriter *
96 csv_writer_open (struct file_handle *fh, const struct dictionary *dict,
97 const struct csv_writer_options *opts)
99 struct csv_writer *w;
100 int i;
102 /* Create and initialize writer. */
103 w = xmalloc (sizeof *w);
104 w->fh = fh_ref (fh);
105 w->lock = NULL;
106 w->file = NULL;
107 w->rf = NULL;
109 w->opts = *opts;
111 w->encoding = xstrdup (dict_get_encoding (dict));
113 w->n_csv_vars = dict_get_n_vars (dict);
114 w->csv_vars = xnmalloc (w->n_csv_vars, sizeof *w->csv_vars);
115 for (i = 0; i < w->n_csv_vars; i++)
117 const struct variable *var = dict_get_var (dict, i);
118 struct csv_var *cv = &w->csv_vars[i];
120 cv->width = var_get_width (var);
121 cv->case_index = var_get_case_index (var);
123 cv->format = *var_get_print_format (var);
124 if (opts->recode_user_missing)
125 mv_copy (&cv->missing, var_get_missing_values (var));
126 else
127 mv_init (&cv->missing, cv->width);
129 if (opts->use_value_labels)
130 cv->val_labs = val_labs_clone (var_get_value_labels (var));
131 else
132 cv->val_labs = NULL;
135 /* Open file handle as an exclusive writer. */
136 /* TRANSLATORS: this fragment will be interpolated into messages in fh_lock()
137 that identify types of files. */
138 w->lock = fh_lock (fh, FH_REF_FILE, N_("CSV file"), FH_ACC_WRITE, true);
139 if (w->lock == NULL)
140 goto error;
142 /* Create the file on disk. */
143 w->rf = replace_file_start (fh, "w", 0666, &w->file);
144 if (w->rf == NULL)
146 msg (ME, _("Error opening `%s' for writing as a CSV file: %s."),
147 fh_get_file_name (fh), strerror (errno));
148 goto error;
151 if (opts->include_var_names)
152 write_var_names (w, dict);
154 if (write_error (w))
155 goto error;
157 return casewriter_create (dict_get_proto (dict),
158 &csv_file_casewriter_class, w);
160 error:
161 close_writer (w);
162 return NULL;
165 static bool
166 csv_field_needs_quoting (struct csv_writer *w, const char *s, size_t len)
168 const char *p;
170 for (p = s; p < &s[len]; p++)
171 if (*p == w->opts.qualifier || *p == w->opts.delimiter
172 || *p == '\n' || *p == '\r')
173 return true;
175 return false;
178 static void
179 csv_output_buffer (struct csv_writer *w, const char *s, size_t len)
181 if (csv_field_needs_quoting (w, s, len))
183 const char *p;
185 putc (w->opts.qualifier, w->file);
186 for (p = s; p < &s[len]; p++)
188 /* We are writing the output file in text mode, so transform any
189 explicit CR-LF line breaks into LF only, to allow the C library to
190 use correct system-specific new-lines. */
191 if (*p == '\r' && p[1] == '\n')
192 continue;
194 if (*p == w->opts.qualifier)
195 putc (w->opts.qualifier, w->file);
196 putc (*p, w->file);
198 putc (w->opts.qualifier, w->file);
200 else
201 fwrite (s, 1, len, w->file);
204 static void
205 csv_output_string (struct csv_writer *w, const char *s)
207 csv_output_buffer (w, s, strlen (s));
210 static void
211 write_var_names (struct csv_writer *w, const struct dictionary *d)
213 size_t i;
215 for (i = 0; i < w->n_csv_vars; i++)
217 if (i > 0)
218 putc (w->opts.delimiter, w->file);
219 csv_output_string (w, var_get_name (dict_get_var (d, i)));
221 putc ('\n', w->file);
224 static void
225 csv_output_format (struct csv_writer *w, const struct csv_var *cv,
226 const union value *value)
228 char *s = data_out (value, w->encoding, &cv->format,
229 settings_get_fmt_settings ());
230 struct substring ss = ss_cstr (s);
231 if (cv->format.type != FMT_A)
232 ss_trim (&ss, ss_cstr (" "));
233 else
234 ss_rtrim (&ss, ss_cstr (" "));
235 csv_output_buffer (w, ss.string, ss.length);
236 free (s);
239 static double
240 extract_date (double number, int *y, int *m, int *d)
242 int yd;
244 calendar_offset_to_gregorian (number / 60. / 60. / 24., y, m, d, &yd);
245 return fmod (number, 60. * 60. * 24.);
248 static void
249 extract_time (double number, double *H, int *M, int *S)
251 *H = floor (number / 60. / 60.);
252 number = fmod (number, 60. * 60.);
254 *M = floor (number / 60.);
255 number = fmod (number, 60.);
257 *S = floor (number);
260 static void
261 csv_write_var__ (struct csv_writer *w, const struct csv_var *cv,
262 const union value *value)
264 const char *label;
266 label = val_labs_find (cv->val_labs, value);
267 if (label != NULL)
268 csv_output_string (w, label);
269 else if (cv->width == 0 && value->f == SYSMIS)
270 csv_output_buffer (w, " ", 1);
271 else if (w->opts.use_print_formats)
272 csv_output_format (w, cv, value);
273 else
275 char s[MAX (DBL_STRLEN_BOUND, 128)];
276 char *cp;
278 switch (cv->format.type)
280 case FMT_F:
281 case FMT_COMMA:
282 case FMT_DOT:
283 case FMT_DOLLAR:
284 case FMT_PCT:
285 case FMT_E:
286 case FMT_CCA:
287 case FMT_CCB:
288 case FMT_CCC:
289 case FMT_CCD:
290 case FMT_CCE:
291 case FMT_N:
292 case FMT_Z:
293 case FMT_P:
294 case FMT_PK:
295 case FMT_IB:
296 case FMT_PIB:
297 case FMT_PIBHEX:
298 case FMT_RB:
299 case FMT_RBHEX:
300 case FMT_WKDAY:
301 case FMT_MONTH:
302 dtoastr (s, sizeof s, 0, 0, value->f);
303 cp = strpbrk (s, ".,");
304 if (cp != NULL)
305 *cp = w->opts.decimal;
306 break;
308 case FMT_DATE:
309 case FMT_ADATE:
310 case FMT_EDATE:
311 case FMT_JDATE:
312 case FMT_SDATE:
313 case FMT_QYR:
314 case FMT_MOYR:
315 case FMT_WKYR:
316 if (value->f < 0)
317 strcpy (s, " ");
318 else
320 int y, m, d;
322 extract_date (value->f, &y, &m, &d);
323 snprintf (s, sizeof s, "%02d/%02d/%04d", m, d, y);
325 break;
327 case FMT_DATETIME:
328 case FMT_YMDHMS:
329 if (value->f < 0)
330 strcpy (s, " ");
331 else
333 int y, m, d, M, S;
334 double H;
336 extract_time (extract_date (value->f, &y, &m, &d), &H, &M, &S);
337 snprintf (s, sizeof s, "%02d/%02d/%04d %02.0f:%02d:%02d",
338 m, d, y, H, M, S);
340 break;
342 case FMT_MTIME:
343 case FMT_TIME:
344 case FMT_DTIME:
346 double H;
347 int M, S;
349 extract_time (fabs (value->f), &H, &M, &S);
350 snprintf (s, sizeof s, "%s%02.0f:%02d:%02d",
351 value->f < 0 ? "-" : "", H, M, S);
353 break;
355 case FMT_A:
356 case FMT_AHEX:
357 csv_output_format (w, cv, value);
358 return;
360 case FMT_NUMBER_OF_FORMATS:
361 NOT_REACHED ();
363 csv_output_string (w, s);
367 static void
368 csv_write_var (struct csv_writer *w, const struct csv_var *cv,
369 const union value *value)
371 if (mv_is_value_missing (&cv->missing, value) == MV_USER)
373 union value missing;
375 value_init (&missing, cv->width);
376 value_set_missing (&missing, cv->width);
377 csv_write_var__ (w, cv, &missing);
378 value_destroy (&missing, cv->width);
380 else
381 csv_write_var__ (w, cv, value);
384 static void
385 csv_write_case (struct csv_writer *w, const struct ccase *c)
387 size_t i;
389 for (i = 0; i < w->n_csv_vars; i++)
391 const struct csv_var *cv = &w->csv_vars[i];
393 if (i > 0)
394 putc (w->opts.delimiter, w->file);
395 csv_write_var (w, cv, case_data_idx (c, cv->case_index));
397 putc ('\n', w->file);
400 /* Writes case C to CSV file W. */
401 static void
402 csv_file_casewriter_write (struct casewriter *writer, void *w_,
403 struct ccase *c)
405 struct csv_writer *w = w_;
407 if (ferror (w->file))
409 casewriter_force_error (writer);
410 case_unref (c);
411 return;
414 csv_write_case (w, c);
415 case_unref (c);
418 /* Destroys CSV file writer W. */
419 static void
420 csv_file_casewriter_destroy (struct casewriter *writer, void *w_)
422 struct csv_writer *w = w_;
423 if (!close_writer (w))
424 casewriter_force_error (writer);
427 /* Returns true if an I/O error has occurred on WRITER, false otherwise. */
428 bool
429 write_error (const struct csv_writer *writer)
431 return ferror (writer->file);
434 /* Closes a CSV file after we're done with it.
435 Returns true if successful, false if an I/O error occurred. */
436 bool
437 close_writer (struct csv_writer *w)
439 size_t i;
440 bool ok;
442 if (w == NULL)
443 return true;
445 ok = true;
446 if (w->file != NULL)
448 if (write_error (w))
449 ok = false;
450 if (fclose (w->file) == EOF)
451 ok = false;
453 if (!ok)
454 msg (ME, _("An I/O error occurred writing CSV file `%s'."),
455 fh_get_file_name (w->fh));
457 if (ok ? !replace_file_commit (w->rf) : !replace_file_abort (w->rf))
458 ok = false;
461 fh_unlock (w->lock);
462 fh_unref (w->fh);
464 free (w->encoding);
466 for (i = 0; i < w->n_csv_vars; i++)
468 struct csv_var *cv = &w->csv_vars[i];
469 mv_destroy (&cv->missing);
470 val_labs_destroy (cv->val_labs);
473 free (w->csv_vars);
474 free (w);
476 return ok;
479 /* CSV file writer casewriter class. */
480 static const struct casewriter_class csv_file_casewriter_class =
482 csv_file_casewriter_write,
483 csv_file_casewriter_destroy,
484 NULL,