NPAR: Fix order of arguments to xnmalloc().
[pspp.git] / utilities / pspp-convert.c
blobcc7f668a2eff7959043a0358742eab289f1ee101
1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2013, 2014, 2015, 2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 #include <config.h>
19 #include <errno.h>
20 #include <getopt.h>
21 #include <limits.h>
22 #include <stdlib.h>
23 #include <unistd.h>
25 #include "data/any-reader.h"
26 #include "data/case-map.h"
27 #include "data/casereader.h"
28 #include "data/casewriter.h"
29 #include "data/csv-file-writer.h"
30 #include "data/dictionary.h"
31 #include "data/encrypted-file.h"
32 #include "data/file-name.h"
33 #include "data/por-file-writer.h"
34 #include "data/settings.h"
35 #include "data/sys-file-writer.h"
36 #include "data/file-handle-def.h"
37 #include "language/command.h"
38 #include "language/lexer/lexer.h"
39 #include "language/lexer/variable-parser.h"
40 #include "libpspp/assertion.h"
41 #include "libpspp/cast.h"
42 #include "libpspp/i18n.h"
44 #include "gl/error.h"
45 #include "gl/getpass.h"
46 #include "gl/localcharset.h"
47 #include "gl/progname.h"
48 #include "gl/version-etc.h"
50 #include "gettext.h"
51 #define _(msgid) gettext (msgid)
53 static void usage (void);
55 static bool decrypt_file (struct encrypted_file *enc,
56 const struct file_handle *input_filename,
57 const struct file_handle *output_filename,
58 const char *password,
59 const char *alphabet, int max_length,
60 const char *password_list);
62 static void
63 parse_character_option (const char *arg, const char *option_name, char *out)
65 if (strlen (arg) != 1)
67 /* XXX support multibyte characters */
68 error (1, 0, _("%s argument must be a single character"), option_name);
70 *out = arg[0];
73 static bool
74 parse_variables_option (const char *arg, struct dictionary *dict,
75 struct variable ***vars, size_t *n_vars)
77 struct lexer *lexer = lex_create ();
78 lex_append (lexer, lex_reader_for_string (arg, locale_charset ()));
79 lex_get (lexer);
81 bool ok = parse_variables (lexer, dict, vars, n_vars, 0);
82 if (ok && (lex_token (lexer) != T_STOP && lex_token (lexer) != T_ENDCMD))
84 lex_error (lexer, _("Syntax error expecting variable name."));
85 ok = false;
88 lex_destroy (lexer);
89 if (!ok)
91 free (*vars);
92 *vars = NULL;
93 *n_vars = 0;
95 return ok;
98 int
99 main (int argc, char *argv[])
101 const char *input_filename;
102 const char *output_filename;
104 long long int max_cases = LLONG_MAX;
105 const char *keep = NULL;
106 const char *drop = NULL;
107 struct dictionary *dict = NULL;
108 struct casereader *reader = NULL;
109 struct file_handle *input_fh = NULL;
110 const char *encoding = NULL;
111 struct encrypted_file *enc;
113 const char *output_format = NULL;
114 struct file_handle *output_fh = NULL;
115 struct casewriter *writer;
116 const char *password = NULL;
117 struct string alphabet = DS_EMPTY_INITIALIZER;
118 const char *password_list = NULL;
119 int length = 0;
121 struct csv_writer_options csv_opts = {
122 .include_var_names = true,
123 .decimal = '.',
124 .delimiter = 0, /* The default will be set later. */
125 .qualifier = '"',
128 long long int i;
130 set_program_name (argv[0]);
131 i18n_init ();
132 fh_init ();
133 settings_init ();
135 for (;;)
137 enum
139 OPT_PASSWORD_LIST = UCHAR_MAX + 1,
140 OPT_RECODE,
141 OPT_NO_VAR_NAMES,
142 OPT_LABELS,
143 OPT_PRINT_FORMATS,
144 OPT_DECIMAL,
145 OPT_DELIMITER,
146 OPT_QUALIFIER,
148 static const struct option long_options[] =
150 { "cases", required_argument, NULL, 'c' },
151 { "keep", required_argument, NULL, 'k' },
152 { "drop", required_argument, NULL, 'd' },
153 { "encoding", required_argument, NULL, 'e' },
155 { "recode", no_argument, NULL, OPT_RECODE },
156 { "no-var-names", no_argument, NULL, OPT_NO_VAR_NAMES },
157 { "labels", no_argument, NULL, OPT_LABELS },
158 { "print-formats", no_argument, NULL, OPT_PRINT_FORMATS },
159 { "decimal", required_argument, NULL, OPT_DECIMAL },
160 { "delimiter", required_argument, NULL, OPT_DELIMITER },
161 { "qualifier", required_argument, NULL, OPT_QUALIFIER },
163 { "password", required_argument, NULL, 'p' },
164 { "password-alphabet", required_argument, NULL, 'a' },
165 { "password-length", required_argument, NULL, 'l' },
166 { "password-list", required_argument, NULL, OPT_PASSWORD_LIST },
168 { "output-format", required_argument, NULL, 'O' },
170 { "help", no_argument, NULL, 'h' },
171 { "version", no_argument, NULL, 'v' },
172 { NULL, 0, NULL, 0 },
175 int c;
177 c = getopt_long (argc, argv, "c:k:d:e:p:a:l:O:hv", long_options, NULL);
178 if (c == -1)
179 break;
181 switch (c)
183 case 'c':
184 max_cases = strtoull (optarg, NULL, 0);
185 break;
187 case 'k':
188 keep = optarg;
189 break;
191 case 'd':
192 drop = optarg;
193 break;
195 case 'e':
196 encoding = optarg;
197 break;
199 case 'p':
200 password = optarg;
201 break;
203 case 'l':
204 length = atoi (optarg);
205 break;
207 case OPT_PASSWORD_LIST:
208 password_list = optarg;
209 break;
211 case OPT_RECODE:
212 csv_opts.recode_user_missing = true;
213 break;
215 case OPT_NO_VAR_NAMES:
216 csv_opts.include_var_names = false;
217 break;
219 case OPT_LABELS:
220 csv_opts.use_value_labels = true;
221 break;
223 case OPT_DECIMAL:
224 parse_character_option (optarg, "--decimal", &csv_opts.decimal);
225 break;
227 case OPT_DELIMITER:
228 parse_character_option (optarg, "--delimiter", &csv_opts.delimiter);
229 break;
231 case OPT_QUALIFIER:
232 parse_character_option (optarg, "--qualifier", &csv_opts.qualifier);
233 break;
235 case 'a':
236 for (const char *p = optarg; *p;)
237 if (p[1] == '-' && p[2] > p[0])
239 for (int ch = p[0]; ch <= p[2]; ch++)
240 ds_put_byte (&alphabet, ch);
241 p += 3;
243 else
244 ds_put_byte (&alphabet, *p++);
245 break;
247 case 'O':
248 output_format = optarg;
249 break;
251 case 'v':
252 version_etc (stdout, "pspp-convert", PACKAGE_NAME, PACKAGE_VERSION,
253 "Ben Pfaff", "John Darrington", NULL_SENTINEL);
254 exit (EXIT_SUCCESS);
256 case 'h':
257 usage ();
258 exit (EXIT_SUCCESS);
260 default:
261 goto error;
265 if (optind + 2 != argc)
266 error (1, 0, _("exactly two non-option arguments are required; "
267 "use --help for help"));
269 input_filename = argv[optind];
270 output_filename = argv[optind + 1];
271 input_fh = fh_create_file (NULL, input_filename, NULL, fh_default_properties ());
273 if (output_format == NULL)
275 const char *dot = strrchr (output_filename, '.');
276 if (dot == NULL)
277 error (1, 0, _("%s: cannot guess output format (use -O option)"),
278 output_filename);
280 output_format = dot + 1;
283 output_fh = fh_create_file (NULL, output_filename, NULL, fh_default_properties ());
284 if (encrypted_file_open (&enc, input_fh) > 0)
286 if (decrypt_file (enc, input_fh, output_fh, password,
287 ds_cstr (&alphabet), length, password_list))
288 goto exit;
289 else
290 goto error;
294 reader = any_reader_open_and_decode (input_fh, encoding, &dict, NULL);
295 if (reader == NULL)
296 goto error;
298 struct case_map_stage *stage = case_map_stage_create (dict);
299 if (keep)
301 struct variable **keep_vars;
302 size_t n_keep_vars;
303 if (!parse_variables_option (keep, dict, &keep_vars, &n_keep_vars))
304 goto error;
305 dict_reorder_vars (dict, keep_vars, n_keep_vars);
306 dict_delete_consecutive_vars (dict, n_keep_vars,
307 dict_get_n_vars (dict) - n_keep_vars);
308 free (keep_vars);
311 if (drop)
313 struct variable **drop_vars;
314 size_t n_drop_vars;
315 if (!parse_variables_option (drop, dict, &drop_vars, &n_drop_vars))
316 goto error;
317 dict_delete_vars (dict, drop_vars, n_drop_vars);
318 free (drop_vars);
321 reader = case_map_create_input_translator (
322 case_map_stage_to_case_map (stage), reader);
324 if (!strcmp (output_format, "csv") || !strcmp (output_format, "txt"))
326 if (!csv_opts.delimiter)
327 csv_opts.delimiter = csv_opts.decimal == '.' ? ',' : ';';
328 writer = csv_writer_open (output_fh, dict, &csv_opts);
330 else if (!strcmp (output_format, "sav") || !strcmp (output_format, "sys"))
332 struct sfm_write_options options;
334 options = sfm_writer_default_options ();
335 writer = sfm_open_writer (output_fh, dict, options);
337 else if (!strcmp (output_format, "por"))
339 struct pfm_write_options options;
341 options = pfm_writer_default_options ();
342 writer = pfm_open_writer (output_fh, dict, options);
344 else
346 error (1, 0, _("%s: unknown output format (use -O option)"),
347 output_filename);
348 NOT_REACHED ();
350 if (!writer)
351 error (1, 0, _("%s: error opening output file"), output_filename);
353 for (i = 0; i < max_cases; i++)
355 struct ccase *c;
357 c = casereader_read (reader);
358 if (c == NULL)
359 break;
361 casewriter_write (writer, c);
364 if (!casereader_destroy (reader))
365 error (1, 0, _("%s: error reading input file"), input_filename);
366 if (!casewriter_destroy (writer))
367 error (1, 0, _("%s: error writing output file"), output_filename);
369 exit:
370 ds_destroy (&alphabet);
371 dict_unref (dict);
372 fh_unref (output_fh);
373 fh_unref (input_fh);
374 fh_done ();
375 i18n_done ();
377 return 0;
379 error:
380 casereader_destroy (reader);
381 ds_destroy (&alphabet);
382 dict_unref (dict);
383 fh_unref (output_fh);
384 fh_unref (input_fh);
385 fh_done ();
386 i18n_done ();
388 return 1;
391 static bool
392 decrypt_file (struct encrypted_file *enc,
393 const struct file_handle *ifh,
394 const struct file_handle *ofh,
395 const char *password,
396 const char *alphabet,
397 int max_length,
398 const char *password_list)
400 FILE *out;
401 int err;
402 const char *input_filename = fh_get_file_name (ifh);
403 const char *output_filename = fh_get_file_name (ofh);
405 if (password_list)
407 FILE *password_file;
408 if (!strcmp (password_list, "-"))
409 password_file = stdin;
410 else
412 password_file = fopen (password_list, "r");
413 if (!password_file)
414 error (1, errno, _("%s: error opening password file"),
415 password_list);
418 struct string pw = DS_EMPTY_INITIALIZER;
419 unsigned int target = 100000;
420 for (unsigned int i = 0; ; i++)
422 ds_clear (&pw);
423 if (!ds_read_line (&pw, password_file, SIZE_MAX))
425 if (isatty (STDOUT_FILENO))
427 putchar ('\r');
428 fflush (stdout);
430 error (1, 0, _("\n%s: password not in file"), password_list);
432 ds_chomp_byte (&pw, '\n');
434 if (i >= target)
436 target += 100000;
437 if (isatty (STDOUT_FILENO))
439 printf ("\r%u", i);
440 fflush (stdout);
444 if (encrypted_file_unlock__ (enc, ds_cstr (&pw)))
446 printf ("\npassword is: \"%s\"\n", ds_cstr (&pw));
447 password = ds_cstr (&pw);
448 break;
452 else if (alphabet[0] && max_length)
454 size_t alphabet_size = strlen (alphabet);
455 char *pw = xmalloc (max_length + 1);
456 int *indexes = xzalloc (max_length * sizeof *indexes);
458 for (int len = password ? strlen (password) : 0;
459 len <= max_length; len++)
461 if (password && len == strlen (password))
463 for (int i = 0; i < len; i++)
465 const char *p = strchr (alphabet, password[i]);
466 if (!p)
467 error (1, 0, _("%s: '%c' is not in alphabet"),
468 password, password[i]);
469 indexes[i] = p - alphabet;
470 pw[i] = *p;
473 else
475 memset (indexes, 0, len * sizeof *indexes);
476 for (int i = 0; i < len; i++)
477 pw[i] = alphabet[0];
479 pw[len] = '\0';
481 unsigned int target = 0;
482 for (unsigned int j = 0; ; j++)
484 if (j >= target)
486 target += 100000;
487 if (isatty (STDOUT_FILENO))
489 printf ("\rlength %d: %s", len, pw);
490 fflush (stdout);
493 if (encrypted_file_unlock__ (enc, pw))
495 printf ("\npassword is: \"%s\"\n", pw);
496 password = pw;
497 goto success;
500 int i;
501 for (i = 0; i < len; i++)
502 if (++indexes[i] < alphabet_size)
504 pw[i] = alphabet[indexes[i]];
505 break;
507 else
509 indexes[i] = 0;
510 pw[i] = alphabet[indexes[i]];
512 if (i == len)
513 break;
516 free (indexes);
517 free (pw);
519 success:;
521 else
523 if (password == NULL)
525 password = getpass ("password: ");
526 if (password == NULL)
527 return false;
530 if (!encrypted_file_unlock (enc, password))
531 error (1, 0, _("sorry, wrong password"));
534 out = fn_open (ofh, "wb");
535 if (out == NULL)
536 error (1, errno, ("%s: error opening output file"), output_filename);
538 for (;;)
540 uint8_t buffer[1024];
541 size_t n;
543 n = encrypted_file_read (enc, buffer, sizeof buffer);
544 if (n == 0)
545 break;
547 if (fwrite (buffer, 1, n, out) != n)
548 error (1, errno, ("%s: write error"), output_filename);
551 err = encrypted_file_close (enc);
552 if (err)
553 error (1, err, ("%s: read error"), input_filename);
555 if (fflush (out) == EOF)
556 error (1, errno, ("%s: write error"), output_filename);
557 fn_close (ofh, out);
559 return true;
562 static void
563 usage (void)
565 printf ("\
566 %s, a utility for converting SPSS data files to other formats.\n\
567 Usage: %s [OPTION]... INPUT OUTPUT\n\
568 where INPUT is an SPSS data file or encrypted syntax file\n\
569 and OUTPUT is the name of the desired output file.\n\
571 The desired format of OUTPUT is by default inferred from its extension:\n\
572 csv txt comma-separated value\n\
573 sav sys SPSS system file\n\
574 por SPSS portable file\n\
575 sps SPSS syntax file (encrypted syntax input files only)\n\
577 General options:\n\
578 -O, --output-format=FORMAT set specific output format, where FORMAT\n\
579 is one of the extensions listed above\n\
580 -e, --encoding=CHARSET override encoding of input data file\n\
581 -c MAXCASES limit number of cases to copy (default is all cases)\n\
582 -k, --keep=VAR... include only the given variables in output\n\
583 -d, --drop=VAR... drop the given variables from output\n\
584 CSV output options:\n\
585 --recode convert user-missing values to system-missing\n\
586 --no-var-names do not include variable names as first row\n\
587 --labels write value labels to output\n\
588 --print-formats honor variables' print formats\n\
589 --decimal=CHAR use CHAR as the decimal point (default: .)\n\
590 --delimiter=CHAR use CHAR to separate fields (default: ,)\n\
591 --qualifier=CHAR use CHAR to quote the delimiter (default: \")\n\
592 Password options (for used with encrypted files):\n\
593 -p PASSWORD individual password\n\
594 -a ALPHABET with -l, alphabet of passwords to try\n\
595 -l MAX-LENGTH with -a, maximum number of characters to try\n\
596 --password-list=FILE try all of the passwords in FILE (one per line)\n\
597 Other options:\n\
598 --help display this help and exit\n\
599 --version output version information and exit\n",
600 program_name, program_name);