1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2006-2007, 2009-2016 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "data/any-reader.h"
26 #include "data/case.h"
27 #include "data/casereader-provider.h"
28 #include "data/casereader.h"
29 #include "data/dictionary.h"
30 #include "data/file-handle-def.h"
31 #include "data/file-name.h"
32 #include "data/format.h"
33 #include "data/identifier.h"
34 #include "data/missing-values.h"
35 #include "data/value-labels.h"
36 #include "data/value.h"
37 #include "data/variable.h"
38 #include "libpspp/float-format.h"
39 #include "libpspp/i18n.h"
40 #include "libpspp/integer-format.h"
41 #include "libpspp/message.h"
42 #include "libpspp/misc.h"
43 #include "libpspp/pool.h"
44 #include "libpspp/str.h"
46 #include "gl/localcharset.h"
47 #include "gl/minmax.h"
48 #include "gl/xalloc.h"
52 #define _(msgid) gettext (msgid)
53 #define N_(msgid) (msgid)
63 struct pcp_dir_entry main
;
64 struct pcp_dir_entry variables
;
65 struct pcp_dir_entry labels
;
66 struct pcp_dir_entry data
;
69 struct pcp_main_header
71 char product
[63]; /* "PCSPSS SYSTEM FILE..." */
72 unsigned int nominal_case_size
; /* Number of var positions. */
73 char creation_date
[9]; /* "[m]m/dd/yy". */
74 char creation_time
[9]; /* "[H]H:MM:SS". */
75 char file_label
[65]; /* File label. */
76 unsigned int weight_index
; /* Index of weighting variable, 0 if none. */
88 struct fmt_spec format
;
94 struct pcp_value_label
*val_labs
;
98 struct pcp_value_label
104 /* System file reader. */
107 struct any_reader any_reader
;
109 /* Resource tracking. */
110 struct pool
*pool
; /* All system file state. */
113 unsigned int file_size
;
114 struct any_read_info info
;
115 struct pcp_directory directory
;
116 struct pcp_main_header header
;
117 struct pcp_var_record
*vars
;
121 struct file_handle
*fh
; /* File handle. */
122 struct fh_lock
*lock
; /* Mutual exclusion for file handle. */
123 FILE *file
; /* File stream. */
124 unsigned int pos
; /* Position in file. */
125 bool error
; /* I/O or corruption error? */
126 struct caseproto
*proto
; /* Format of output cases. */
129 unsigned int n_cases
; /* Number of cases */
130 const char *encoding
; /* String encoding. */
134 uint8_t opcodes
[8]; /* Current block of opcodes. */
135 size_t opcode_idx
; /* Next opcode to interpret, 8 if none left. */
136 bool corruption_warning
; /* Warned about possible corruption? */
139 static struct pcp_reader
*
140 pcp_reader_cast (const struct any_reader
*r_
)
142 assert (r_
->klass
== &pcp_file_reader_class
);
143 return UP_CAST (r_
, struct pcp_reader
, any_reader
);
146 static const struct casereader_class pcp_file_casereader_class
;
148 static bool pcp_close (struct any_reader
*);
150 static bool read_variables_record (struct pcp_reader
*);
152 static void pcp_msg (struct pcp_reader
*r
, off_t
, int class,
153 const char *format
, va_list args
)
154 PRINTF_FORMAT (4, 0);
155 static void pcp_warn (struct pcp_reader
*, off_t
, const char *, ...)
156 PRINTF_FORMAT (3, 4);
157 static void pcp_error (struct pcp_reader
*, off_t
, const char *, ...)
158 PRINTF_FORMAT (3, 4);
160 static bool read_bytes (struct pcp_reader
*, void *, size_t)
162 static int try_read_bytes (struct pcp_reader
*, void *, size_t)
164 static bool read_uint16 (struct pcp_reader
*, unsigned int *)
166 static bool read_uint32 (struct pcp_reader
*, unsigned int *)
168 static bool read_float (struct pcp_reader
*, double *)
170 static double parse_float (const uint8_t number
[8]);
171 static bool read_string (struct pcp_reader
*, char *, size_t)
173 static bool skip_bytes (struct pcp_reader
*, size_t) WARN_UNUSED_RESULT
;
175 static bool pcp_seek (struct pcp_reader
*, off_t
);
177 static bool pcp_is_sysmis(const uint8_t *);
179 /* Dictionary reader. */
181 static bool read_dictionary (struct pcp_reader
*);
182 static bool read_main_header (struct pcp_reader
*, struct pcp_main_header
*);
183 static void parse_header (struct pcp_reader
*,
184 const struct pcp_main_header
*,
185 struct any_read_info
*, struct dictionary
*);
186 static bool parse_variable_records (struct pcp_reader
*, struct dictionary
*,
187 struct pcp_var_record
*, size_t n
);
189 /* Tries to open FH for reading as an SPSS/PC+ system file. Returns a
190 pcp_reader if successful, otherwise NULL. */
191 static struct any_reader
*
192 pcp_open (struct file_handle
*fh
)
196 /* Create and initialize reader. */
197 struct pcp_reader
*r
= XZALLOC (struct pcp_reader
);
198 r
->any_reader
.klass
= &pcp_file_reader_class
;
199 r
->pool
= pool_create ();
200 pool_register (r
->pool
, free
, r
);
202 r
->opcode_idx
= sizeof r
->opcodes
;
204 /* TRANSLATORS: this fragment will be interpolated into
205 messages in fh_lock() that identify types of files. */
206 r
->lock
= fh_lock (fh
, FH_REF_FILE
, N_("SPSS/PC+ system file"),
212 r
->file
= fn_open (fh
, "rb");
215 msg (ME
, _("Error opening `%s' for reading as an SPSS/PC+ "
217 fh_get_file_name (r
->fh
), strerror (errno
));
221 /* Fetch file size. */
222 if (fstat (fileno (r
->file
), &s
))
224 pcp_error (r
, 0, _("%s: stat failed (%s)."),
225 fh_get_file_name (r
->fh
), strerror (errno
));
228 if (s
.st_size
> UINT_MAX
)
230 pcp_error (r
, 0, _("%s: file too large."), fh_get_file_name (r
->fh
));
233 r
->file_size
= s
.st_size
;
235 /* Read dictionary. */
236 if (!read_dictionary (r
))
239 if (!pcp_seek (r
, r
->directory
.data
.ofs
))
242 return &r
->any_reader
;
245 pcp_close (&r
->any_reader
);
250 pcp_read_dir_entry (struct pcp_reader
*r
, struct pcp_dir_entry
*de
)
252 if (!read_uint32 (r
, &de
->ofs
) || !read_uint32 (r
, &de
->len
))
255 if (de
->len
> r
->file_size
|| de
->ofs
> r
->file_size
- de
->len
)
257 pcp_error (r
, r
->pos
- 8, _("Directory entry is for a %u-byte record "
258 "starting at offset %u but file is only "
260 de
->ofs
, de
->len
, r
->file_size
);
268 read_dictionary (struct pcp_reader
*r
)
270 unsigned int two
, zero
;
272 if (!read_uint32 (r
, &two
) || !read_uint32 (r
, &zero
))
274 if (two
!= 2 || zero
!= 0)
275 pcp_warn (r
, 0, _("Directory fields have unexpected values "
276 "(%u,%u)."), two
, zero
);
278 if (!pcp_read_dir_entry (r
, &r
->directory
.main
)
279 || !pcp_read_dir_entry (r
, &r
->directory
.variables
)
280 || !pcp_read_dir_entry (r
, &r
->directory
.labels
)
281 || !pcp_read_dir_entry (r
, &r
->directory
.data
))
284 if (!read_main_header (r
, &r
->header
))
287 read_variables_record (r
);
292 struct get_strings_aux
303 add_string__ (struct get_strings_aux
*aux
,
304 const char *string
, bool id
, char *title
)
306 if (aux
->n
>= aux
->allocated
)
308 aux
->allocated
= 2 * (aux
->allocated
+ 1);
309 aux
->titles
= pool_realloc (aux
->pool
, aux
->titles
,
310 aux
->allocated
* sizeof *aux
->titles
);
311 aux
->strings
= pool_realloc (aux
->pool
, aux
->strings
,
312 aux
->allocated
* sizeof *aux
->strings
);
313 aux
->ids
= pool_realloc (aux
->pool
, aux
->ids
,
314 aux
->allocated
* sizeof *aux
->ids
);
317 aux
->titles
[aux
->n
] = title
;
318 aux
->strings
[aux
->n
] = pool_strdup (aux
->pool
, string
);
319 aux
->ids
[aux
->n
] = id
;
323 static void PRINTF_FORMAT (3, 4)
324 add_string (struct get_strings_aux
*aux
,
325 const char *string
, const char *title
, ...)
329 va_start (args
, title
);
330 add_string__ (aux
, string
, false, pool_vasprintf (aux
->pool
, title
, args
));
334 static void PRINTF_FORMAT (3, 4)
335 add_id (struct get_strings_aux
*aux
, const char *id
, const char *title
, ...)
339 va_start (args
, title
);
340 add_string__ (aux
, id
, true, pool_vasprintf (aux
->pool
, title
, args
));
344 /* Retrieves significant string data from R in its raw format, to allow the
345 caller to try to detect the encoding in use.
347 Returns the number of strings retrieved N. Sets each of *TITLESP, *IDSP,
348 and *STRINGSP to an array of N elements allocated from POOL. For each I in
349 0...N-1, UTF-8 string *TITLESP[I] describes *STRINGSP[I], which is in
350 whatever encoding system file R uses. *IDS[I] is true if *STRINGSP[I] must
351 be a valid PSPP language identifier, false if *STRINGSP[I] is free-form
354 pcp_get_strings (const struct any_reader
*r_
, struct pool
*pool
,
355 char ***titlesp
, bool **idsp
, char ***stringsp
)
357 struct pcp_reader
*r
= pcp_reader_cast (r_
);
358 struct get_strings_aux aux
;
370 for (i
= 0; i
< r
->n_vars
; i
++)
371 if (r
->vars
[i
].width
!= -1)
372 add_id (&aux
, r
->vars
[i
].name
, _("Variable %zu"), ++var_idx
);
375 for (i
= 0; i
< r
->n_vars
; i
++)
376 if (r
->vars
[i
].width
!= -1)
379 if (r
->vars
[i
].label
)
380 add_string (&aux
, r
->vars
[i
].label
, _("Variable %zu Label"),
383 for (j
= 0; j
< r
->vars
[i
].n_val_labs
; j
++)
384 add_string (&aux
, r
->vars
[i
].label
,
385 _("Variable %zu Value Label %zu"), var_idx
, j
);
388 add_string (&aux
, r
->header
.creation_date
, _("Creation Date"));
389 add_string (&aux
, r
->header
.creation_time
, _("Creation Time"));
390 add_string (&aux
, r
->header
.product
, _("Product"));
391 add_string (&aux
, r
->header
.file_label
, _("File Label"));
393 *titlesp
= aux
.titles
;
395 *stringsp
= aux
.strings
;
399 /* Decodes the dictionary read from R, saving it into *DICT. Character
400 strings in R are decoded using ENCODING, or an encoding obtained from R if
401 ENCODING is null, or the locale encoding if R specifies no encoding.
403 If INFOP is non-null, then it receives additional info about the system
404 file, which the caller must eventually free with any_read_info_destroy()
405 when it is no longer needed.
407 This function consumes R. The caller must use it again later, even to
408 destroy it with pcp_close(). */
409 static struct casereader
*
410 pcp_decode (struct any_reader
*r_
, const char *encoding
,
411 struct dictionary
**dictp
, struct any_read_info
*infop
)
413 struct pcp_reader
*r
= pcp_reader_cast (r_
);
414 struct dictionary
*dict
;
416 if (encoding
== NULL
)
418 encoding
= locale_charset ();
419 pcp_warn (r
, -1, _("Using default encoding %s to read this SPSS/PC+ "
420 "system file. For best results, specify an "
421 "encoding explicitly. Use SYSFILE INFO with "
422 "ENCODING=\"DETECT\" to analyze the possible "
427 dict
= dict_create (encoding
);
428 r
->encoding
= dict_get_encoding (dict
);
430 parse_header (r
, &r
->header
, &r
->info
, dict
);
431 if (!parse_variable_records (r
, dict
, r
->vars
, r
->n_vars
))
434 /* Create an index of dictionary variable widths for
435 pcp_read_case to use. We cannot use the `struct variable's
436 from the dictionary we created, because the caller owns the
437 dictionary and may destroy or modify its variables. */
438 r
->proto
= caseproto_ref_pool (dict_get_proto (dict
), r
->pool
);
444 memset (&r
->info
, 0, sizeof r
->info
);
447 return casereader_create_sequential
448 (NULL
, r
->proto
, r
->n_cases
, &pcp_file_casereader_class
, r
);
451 pcp_close (&r
->any_reader
);
457 /* Closes R, which should have been returned by pcp_open() but not already
458 closed with pcp_decode() or this function.
459 Returns true if an I/O error has occurred on READER, false
462 pcp_close (struct any_reader
*r_
)
464 struct pcp_reader
*r
= pcp_reader_cast (r_
);
469 if (fn_close (r
->fh
, r
->file
) == EOF
)
471 msg (ME
, _("Error closing system file `%s': %s."),
472 fh_get_file_name (r
->fh
), strerror (errno
));
478 any_read_info_destroy (&r
->info
);
483 pool_destroy (r
->pool
);
488 /* Destroys READER. */
490 pcp_file_casereader_destroy (struct casereader
*reader UNUSED
, void *r_
)
492 struct pcp_reader
*r
= r_
;
493 pcp_close (&r
->any_reader
);
496 /* Detects whether FILE is an SPSS/PC+ system file. Returns 1 if so, 0 if
497 not, and a negative errno value if there is an error reading FILE. */
499 pcp_detect (FILE *file
)
501 static const char signature
[4] = "SPSS";
502 char buf
[sizeof signature
];
504 if (fseek (file
, 0x104, SEEK_SET
))
507 if (fread (buf
, sizeof buf
, 1, file
) != 1)
508 return ferror (file
) ? -errno
: 0;
510 return !memcmp (buf
, signature
, sizeof buf
);
513 /* Reads the main header of the SPSS/PC+ system file. Initializes *HEADER and
514 *INFO, except for the string fields in *INFO, which parse_header() will
515 initialize later once the file's encoding is known. */
517 read_main_header (struct pcp_reader
*r
, struct pcp_main_header
*header
)
519 unsigned int base_ofs
= r
->directory
.main
.ofs
;
520 unsigned int zero0
, zero1
, zero2
, zero3
;
521 size_t min_values
, min_data_size
;
522 unsigned int one0
, one1
;
523 unsigned int compressed
;
524 unsigned int n_cases1
;
527 if (!pcp_seek (r
, base_ofs
))
530 if (r
->directory
.main
.len
< 0xb0)
532 pcp_error (r
, r
->pos
, _("This is not an SPSS/PC+ system file."));
535 else if (r
->directory
.main
.len
> 0xb0)
536 pcp_warn (r
, r
->pos
, _("Record 0 has unexpected length %u."),
537 r
->directory
.main
.len
);
539 if (!read_uint16 (r
, &one0
)
540 || !read_string (r
, header
->product
, sizeof header
->product
)
541 || !read_bytes (r
, sysmis
, sizeof sysmis
)
542 || !read_uint32 (r
, &zero0
)
543 || !read_uint32 (r
, &zero1
)
544 || !read_uint16 (r
, &one1
)
545 || !read_uint16 (r
, &compressed
)
546 || !read_uint16 (r
, &header
->nominal_case_size
)
547 || !read_uint16 (r
, &r
->n_cases
)
548 || !read_uint16 (r
, &header
->weight_index
)
549 || !read_uint16 (r
, &zero2
)
550 || !read_uint16 (r
, &n_cases1
)
551 || !read_uint16 (r
, &zero3
)
552 || !read_string (r
, header
->creation_date
, sizeof header
->creation_date
)
553 || !read_string (r
, header
->creation_time
, sizeof header
->creation_time
)
554 || !read_string (r
, header
->file_label
, sizeof header
->file_label
))
557 if (!pcp_is_sysmis (sysmis
))
559 double d
= parse_float (sysmis
);
560 pcp_warn (r
, base_ofs
, _("Record 0 specifies unexpected system missing "
561 "value %g (%a)."), d
, d
);
563 if (one0
!= 1 || one1
!= 1
564 || zero0
!= 0 || zero1
!= 0 || zero2
!= 0 || zero3
!= 0)
565 pcp_warn (r
, base_ofs
, _("Record 0 reserved fields have unexpected values "
566 "(%u,%u,%u,%u,%u,%u)."),
567 one0
, one1
, zero0
, zero1
, zero2
, zero3
);
568 if (n_cases1
!= r
->n_cases
)
569 pcp_warn (r
, base_ofs
, _("Record 0 case counts differ (%u versus %u)."),
570 r
->n_cases
, n_cases1
);
571 if (compressed
!= 0 && compressed
!= 1)
573 pcp_error (r
, base_ofs
, _("Invalid compression type %u."), compressed
);
577 r
->compressed
= compressed
!= 0;
579 min_values
= xtimes (header
->nominal_case_size
, r
->n_cases
);
580 min_data_size
= xtimes (compressed
? 1 : 8, min_values
);
581 if (r
->directory
.data
.len
< min_data_size
582 || size_overflow_p (min_data_size
))
584 pcp_warn (r
, base_ofs
, _("Record 0 claims %u cases with %u values per "
585 "case (requiring at least %zu bytes) but data "
586 "record is only %u bytes long."),
587 r
->n_cases
, header
->nominal_case_size
, min_data_size
,
588 r
->directory
.data
.len
);
596 read_value_labels (struct pcp_reader
*r
, struct pcp_var_record
*var
,
597 unsigned int start
, unsigned int end
)
599 size_t allocated_val_labs
= 0;
603 if (end
> r
->directory
.labels
.len
)
605 pcp_warn (r
, r
->pos
- 32,
606 _("Value labels claimed to end at offset %u in labels record "
607 "but labels record is only %u bytes."),
608 end
, r
->directory
.labels
.len
);
612 start
+= r
->directory
.labels
.ofs
;
613 end
+= r
->directory
.labels
.ofs
;
614 if (start
> end
|| end
> r
->file_size
)
616 pcp_warn (r
, r
->pos
- 32,
617 _("Value labels claimed to be at offset %u with length %u "
618 "but file size is only %u bytes."),
619 start
, end
- start
, r
->file_size
);
623 if (!pcp_seek (r
, start
))
626 while (r
->pos
< end
&& end
- r
->pos
> 8)
628 struct pcp_value_label
*vl
;
631 if (var
->n_val_labs
>= allocated_val_labs
)
632 var
->val_labs
= pool_2nrealloc (r
->pool
, var
->val_labs
,
634 sizeof *var
->val_labs
);
635 vl
= &var
->val_labs
[var
->n_val_labs
];
637 if (!read_bytes (r
, vl
->value
, sizeof vl
->value
)
638 || !read_bytes (r
, &len
, 1))
641 if (end
- r
->pos
< len
)
644 _("Value labels end with partial label (%u bytes left in "
645 "record, label length %"PRIu8
")."),
649 vl
->label
= pool_malloc (r
->pool
, len
+ 1);
650 if (!read_bytes (r
, vl
->label
, len
))
653 vl
->label
[len
] = '\0';
657 pcp_warn (r
, r
->pos
, _("%u leftover bytes following value labels."),
664 read_var_label (struct pcp_reader
*r
, struct pcp_var_record
*var
,
670 if (ofs
>= r
->directory
.labels
.len
)
672 pcp_warn (r
, r
->pos
- 32,
673 _("Variable label claimed to start at offset %u in labels "
674 "record but labels record is only %u bytes."),
675 ofs
, r
->directory
.labels
.len
);
679 if (!pcp_seek (r
, ofs
+ r
->directory
.labels
.ofs
) || !read_bytes (r
, &len
, 1))
682 if (len
>= r
->directory
.labels
.len
- ofs
)
684 pcp_warn (r
, r
->pos
- 1,
685 _("Variable label with length %u starting at offset %u in "
686 "labels record overruns end of %u-byte labels record."),
687 len
, ofs
+ 1, r
->directory
.labels
.len
);
691 var
->label
= pool_malloc (r
->pool
, len
+ 1);
692 var
->label
[len
] = '\0';
693 return read_bytes (r
, var
->label
, len
);
696 /* Reads the variables record (record 1) into R. */
698 read_variables_record (struct pcp_reader
*r
)
703 if (!pcp_seek (r
, r
->directory
.variables
.ofs
))
705 if (r
->directory
.variables
.len
!= r
->header
.nominal_case_size
* 32)
707 pcp_error (r
, r
->pos
, _("Record 1 has length %u (expected %u)."),
708 r
->directory
.variables
.len
, r
->header
.nominal_case_size
* 32);
712 r
->vars
= pool_calloc (r
->pool
,
713 r
->header
.nominal_case_size
, sizeof *r
->vars
);
715 for (i
= 0; i
< r
->header
.nominal_case_size
; i
++)
717 struct pcp_var_record
*var
= &r
->vars
[r
->n_vars
++];
718 unsigned int value_label_start
, value_label_end
;
719 unsigned int var_label_ofs
;
724 if (!read_uint32 (r
, &value_label_start
)
725 || !read_uint32 (r
, &value_label_end
)
726 || !read_uint32 (r
, &var_label_ofs
)
727 || !read_uint32 (r
, &format
)
728 || !read_string (r
, var
->name
, sizeof var
->name
)
729 || !read_bytes (r
, var
->missing
, sizeof var
->missing
))
732 var
->weight
= r
->header
.weight_index
&& i
== r
->header
.weight_index
- 1;
736 raw_type
= format
>> 16;
737 if (!fmt_from_io (raw_type
, &var
->format
.type
))
739 pcp_error (r
, var
->pos
, _("Variable %u has invalid type %"PRIu8
"."),
744 var
->format
.w
= (format
>> 8) & 0xff;
745 var
->format
.d
= format
& 0xff;
746 fmt_fix_output (&var
->format
);
747 var
->width
= fmt_var_width (var
->format
);
751 unsigned int save_pos
= r
->pos
;
752 if (!read_var_label (r
, var
, var_label_ofs
)
753 || !pcp_seek (r
, save_pos
))
757 if (value_label_end
> value_label_start
&& var
->width
<= 8)
759 unsigned int save_pos
= r
->pos
;
760 if (!read_value_labels (r
, var
, value_label_start
, value_label_end
)
761 || !pcp_seek (r
, save_pos
))
767 int extra
= DIV_RND_UP (var
->width
- 8, 8);
769 if (!skip_bytes (r
, 32 * extra
))
774 if (r
->header
.weight_index
&& !weighted
)
775 pcp_warn (r
, -1, _("Invalid weight index %u."), r
->header
.weight_index
);
781 recode_and_trim_string (struct pool
*pool
, const char *from
, const char *in
)
783 struct substring out
;
785 out
= recode_substring_pool ("UTF-8", from
, ss_cstr (in
), pool
);
786 ss_trim (&out
, ss_cstr (" "));
787 return ss_xstrdup (out
);
791 parse_header (struct pcp_reader
*r
, const struct pcp_main_header
*header
,
792 struct any_read_info
*info
, struct dictionary
*dict
)
794 const char *dict_encoding
= dict_get_encoding (dict
);
797 memset (info
, 0, sizeof *info
);
799 info
->integer_format
= INTEGER_LSB_FIRST
;
800 info
->float_format
= FLOAT_IEEE_DOUBLE_LE
;
801 info
->compression
= r
->compressed
? ANY_COMP_SIMPLE
: ANY_COMP_NONE
;
802 info
->n_cases
= r
->n_cases
;
804 /* Convert file label to UTF-8 and put it into DICT. */
805 label
= recode_and_trim_string (r
->pool
, dict_encoding
, header
->file_label
);
806 dict_set_label (dict
, label
);
809 /* Put creation date, time, and product in UTF-8 into INFO. */
810 info
->creation_date
= recode_and_trim_string (r
->pool
, dict_encoding
,
811 header
->creation_date
);
812 info
->creation_time
= recode_and_trim_string (r
->pool
, dict_encoding
,
813 header
->creation_time
);
814 info
->product
= recode_and_trim_string (r
->pool
, dict_encoding
,
818 /* Reads a variable (type 2) record from R and adds the
819 corresponding variable to DICT.
820 Also skips past additional variable records for long string
823 parse_variable_records (struct pcp_reader
*r
, struct dictionary
*dict
,
824 struct pcp_var_record
*var_recs
, size_t n_var_recs
)
826 const char *dict_encoding
= dict_get_encoding (dict
);
827 struct pcp_var_record
*rec
;
829 for (rec
= var_recs
; rec
< &var_recs
[n_var_recs
]; rec
++)
834 name
= recode_string_pool ("UTF-8", dict_encoding
,
835 rec
->name
, -1, r
->pool
);
836 name
[strcspn (name
, " ")] = '\0';
838 /* Drop system variables. */
839 rec
->drop
= name
[0] == '$';
842 value_init_pool (r
->pool
, &rec
->tmp
, rec
->width
);
846 if (!dict_id_is_valid (dict
, name
, DC_ORDINARY
))
848 pcp_error (r
, rec
->pos
, _("Invalid variable name `%s'."), name
);
852 struct variable
*var
= dict_create_var (dict
, name
, rec
->width
);
855 var
= dict_create_var_with_unique_name (dict
, name
, rec
->width
);
856 pcp_warn (r
, rec
->pos
, _("Renaming variable with duplicate name "
858 name
, var_get_name (var
));
863 dict_set_weight (dict
, var
);
865 pcp_warn (r
, rec
->pos
,
866 _("Cannot weight by string variable `%s'."), name
);
869 /* Set the short name the same as the long name. */
870 var_set_short_name (var
, 0, name
);
872 /* Get variable label, if any. */
877 utf8_label
= recode_string ("UTF-8", dict_encoding
, rec
->label
, -1);
878 var_set_label (var
, utf8_label
);
882 /* Add value labels. */
883 for (i
= 0; i
< rec
->n_val_labs
; i
++)
888 value_init (&value
, rec
->width
);
889 if (var_is_numeric (var
))
890 value
.f
= parse_float (rec
->val_labs
[i
].value
);
892 memcpy (value
.s
, rec
->val_labs
[i
].value
, rec
->width
);
894 utf8_label
= recode_string ("UTF-8", dict_encoding
,
895 rec
->val_labs
[i
].label
, -1);
896 var_add_value_label (var
, &value
, utf8_label
);
899 value_destroy (&value
, rec
->width
);
902 /* Set missing values. */
903 if (rec
->width
<= 8 && !pcp_is_sysmis (rec
->missing
))
905 int width
= var_get_width (var
);
906 struct missing_values mv
;
908 mv_init_pool (r
->pool
, &mv
, width
);
909 if (var_is_numeric (var
))
910 mv_add_num (&mv
, parse_float (rec
->missing
));
912 mv_add_str (&mv
, rec
->missing
, MIN (width
, 8));
913 var_set_missing_values (var
, &mv
);
917 var_set_both_formats (var
, rec
->format
);
925 static void read_error (struct casereader
*, const struct pcp_reader
*);
927 static bool read_case_number (struct pcp_reader
*, double *);
928 static int read_case_string (struct pcp_reader
*, uint8_t *, size_t);
929 static int read_opcode (struct pcp_reader
*);
930 static bool read_compressed_number (struct pcp_reader
*, double *);
931 static int read_compressed_string (struct pcp_reader
*, uint8_t *);
932 static int read_whole_strings (struct pcp_reader
*, uint8_t *, size_t);
934 /* Reads and returns one case from READER's file. Returns a null
935 pointer if not successful. */
936 static struct ccase
*
937 pcp_file_casereader_read (struct casereader
*reader
, void *r_
)
939 struct pcp_reader
*r
= r_
;
940 unsigned int start_pos
= r
->pos
;
945 if (r
->error
|| !r
->n_cases
)
949 c
= case_create (r
->proto
);
951 for (i
= 0; i
< r
->n_vars
; i
++)
953 struct pcp_var_record
*var
= &r
->vars
[i
];
954 union value
*v
= var
->drop
? &var
->tmp
: case_data_rw_idx (c
, case_idx
++);
957 retval
= read_case_number (r
, &v
->f
);
959 retval
= read_case_string (r
, v
->s
, var
->width
);
963 pcp_error (r
, r
->pos
, _("File ends in partial case."));
967 if (r
->pos
> r
->directory
.data
.ofs
+ r
->directory
.data
.len
)
969 pcp_error (r
, r
->pos
, _("Case beginning at offset 0x%08x extends past "
970 "end of data record at offset 0x%08x."),
971 start_pos
, r
->directory
.data
.ofs
+ r
->directory
.data
.len
);
978 read_error (reader
, r
);
983 /* Issues an error that an unspecified error occurred PCP, and
986 read_error (struct casereader
*r
, const struct pcp_reader
*pcp
)
988 msg (ME
, _("Error reading case from file %s."), fh_get_name (pcp
->fh
));
989 casereader_force_error (r
);
992 /* Reads a number from R and stores its value in *D.
993 If R is compressed, reads a compressed number;
994 otherwise, reads a number in the regular way.
995 Returns true if successful, false if end of file is
996 reached immediately. */
998 read_case_number (struct pcp_reader
*r
, double *d
)
1003 if (!try_read_bytes (r
, number
, sizeof number
))
1005 *d
= parse_float (number
);
1009 return read_compressed_number (r
, d
);
1012 /* Reads LENGTH string bytes from R into S. Always reads a multiple of 8
1013 bytes; if LENGTH is not a multiple of 8, then extra bytes are read and
1014 discarded without being written to S. Reads compressed strings if S is
1015 compressed. Returns 1 if successful, 0 if end of file is reached
1016 immediately, or -1 for some kind of error. */
1018 read_case_string (struct pcp_reader
*r
, uint8_t *s
, size_t length
)
1020 size_t whole
= ROUND_DOWN (length
, 8);
1021 size_t partial
= length
% 8;
1025 int retval
= read_whole_strings (r
, s
, whole
);
1033 int retval
= read_whole_strings (r
, bounce
, sizeof bounce
);
1036 memcpy (s
+ whole
, bounce
, partial
);
1042 /* Reads and returns the next compression opcode from R. */
1044 read_opcode (struct pcp_reader
*r
)
1046 assert (r
->compressed
);
1047 if (r
->opcode_idx
>= sizeof r
->opcodes
)
1049 int retval
= try_read_bytes (r
, r
->opcodes
, sizeof r
->opcodes
);
1054 return r
->opcodes
[r
->opcode_idx
++];
1057 /* Reads a compressed number from R and stores its value in D.
1058 Returns true if successful, false if end of file is
1059 reached immediately. */
1061 read_compressed_number (struct pcp_reader
*r
, double *d
)
1063 int opcode
= read_opcode (r
);
1074 return read_float (r
, d
);
1077 *d
= opcode
- 105.0;
1082 /* Reads a compressed 8-byte string segment from R and stores it in DST. */
1084 read_compressed_string (struct pcp_reader
*r
, uint8_t *dst
)
1089 opcode
= read_opcode (r
);
1096 retval
= read_bytes (r
, dst
, 8);
1097 return retval
== 1 ? 1 : -1;
1100 if (!r
->corruption_warning
)
1102 r
->corruption_warning
= true;
1103 pcp_warn (r
, r
->pos
,
1104 _("Possible compressed data corruption: "
1105 "string contains compressed integer (opcode %d)."),
1108 memset (dst
, ' ', 8);
1113 /* Reads LENGTH string bytes from R into S. LENGTH must be a multiple of 8.
1114 Reads compressed strings if S is compressed. Returns 1 if successful, 0 if
1115 end of file is reached immediately, or -1 for some kind of error. */
1117 read_whole_strings (struct pcp_reader
*r
, uint8_t *s
, size_t length
)
1119 assert (length
% 8 == 0);
1121 return try_read_bytes (r
, s
, length
);
1126 for (ofs
= 0; ofs
< length
; ofs
+= 8)
1128 int retval
= read_compressed_string (r
, s
+ ofs
);
1138 /* Displays a corruption message. */
1140 pcp_msg (struct pcp_reader
*r
, off_t offset
,
1141 int class, const char *format
, va_list args
)
1144 ds_init_empty (&text
);
1146 ds_put_format (&text
, _("`%s' near offset 0x%llx: "),
1147 fh_get_file_name (r
->fh
), (long long int) offset
);
1149 ds_put_format (&text
, _("`%s': "), fh_get_file_name (r
->fh
));
1150 ds_put_vformat (&text
, format
, args
);
1152 struct msg
*m
= xmalloc (sizeof *m
);
1154 .category
= msg_class_to_category (class),
1155 .severity
= msg_class_to_severity (class),
1156 .text
= ds_steal_cstr (&text
),
1161 /* Displays a warning for offset OFFSET in the file. */
1163 pcp_warn (struct pcp_reader
*r
, off_t offset
, const char *format
, ...)
1167 va_start (args
, format
);
1168 pcp_msg (r
, offset
, MW
, format
, args
);
1172 /* Displays an error for the current file position,
1173 marks it as in an error state,
1174 and aborts reading it using longjmp. */
1176 pcp_error (struct pcp_reader
*r
, off_t offset
, const char *format
, ...)
1180 va_start (args
, format
);
1181 pcp_msg (r
, offset
, ME
, format
, args
);
1187 /* Reads BYTE_CNT bytes into BUF.
1188 Returns 1 if exactly BYTE_CNT bytes are successfully read.
1189 Returns -1 if an I/O error or a partial read occurs.
1190 Returns 0 for an immediate end-of-file and, if EOF_IS_OK is false, reports
1193 read_bytes_internal (struct pcp_reader
*r
, bool eof_is_ok
,
1194 void *buf
, size_t n_bytes
)
1196 size_t bytes_read
= fread (buf
, 1, n_bytes
, r
->file
);
1197 r
->pos
+= bytes_read
;
1198 if (bytes_read
== n_bytes
)
1200 else if (ferror (r
->file
))
1202 pcp_error (r
, r
->pos
, _("System error: %s."), strerror (errno
));
1205 else if (!eof_is_ok
|| bytes_read
!= 0)
1207 pcp_error (r
, r
->pos
, _("Unexpected end of file."));
1214 /* Reads BYTE_CNT into BUF.
1215 Returns true if successful.
1216 Returns false upon I/O error or if end-of-file is encountered. */
1218 read_bytes (struct pcp_reader
*r
, void *buf
, size_t n_bytes
)
1220 return read_bytes_internal (r
, false, buf
, n_bytes
) == 1;
1223 /* Reads BYTE_CNT bytes into BUF.
1224 Returns 1 if exactly BYTE_CNT bytes are successfully read.
1225 Returns 0 if an immediate end-of-file is encountered.
1226 Returns -1 if an I/O error or a partial read occurs. */
1228 try_read_bytes (struct pcp_reader
*r
, void *buf
, size_t n_bytes
)
1230 return read_bytes_internal (r
, true, buf
, n_bytes
);
1233 /* Reads a 16-bit signed integer from R and stores its value in host format in
1234 *X. Returns true if successful, otherwise false. */
1236 read_uint16 (struct pcp_reader
*r
, unsigned int *x
)
1239 if (read_bytes (r
, integer
, sizeof integer
) != 1)
1241 *x
= integer_get (INTEGER_LSB_FIRST
, integer
, sizeof integer
);
1245 /* Reads a 32-bit signed integer from R and stores its value in host format in
1246 *X. Returns true if successful, otherwise false. */
1248 read_uint32 (struct pcp_reader
*r
, unsigned int *x
)
1251 if (read_bytes (r
, integer
, sizeof integer
) != 1)
1253 *x
= integer_get (INTEGER_LSB_FIRST
, integer
, sizeof integer
);
1257 /* Reads exactly SIZE - 1 bytes into BUFFER
1258 and stores a null byte into BUFFER[SIZE - 1]. */
1260 read_string (struct pcp_reader
*r
, char *buffer
, size_t size
)
1265 ok
= read_bytes (r
, buffer
, size
- 1);
1267 buffer
[size
- 1] = '\0';
1271 /* Skips BYTES bytes forward in R. */
1273 skip_bytes (struct pcp_reader
*r
, size_t bytes
)
1278 size_t chunk
= MIN (sizeof buffer
, bytes
);
1279 if (!read_bytes (r
, buffer
, chunk
))
1288 pcp_seek (struct pcp_reader
*r
, off_t offset
)
1290 if (fseeko (r
->file
, offset
, SEEK_SET
))
1292 pcp_error (r
, 0, _("%s: seek failed (%s)."),
1293 fh_get_file_name (r
->fh
), strerror (errno
));
1300 /* Reads a 64-bit floating-point number from R and returns its
1301 value in host format. */
1303 read_float (struct pcp_reader
*r
, double *d
)
1307 if (!read_bytes (r
, number
, sizeof number
))
1311 *d
= parse_float (number
);
1317 parse_float (const uint8_t number
[8])
1319 return (pcp_is_sysmis (number
)
1321 : float_get_double (FLOAT_IEEE_DOUBLE_LE
, number
));
1325 pcp_is_sysmis(const uint8_t *p
)
1327 static const uint8_t sysmis
[8]
1328 = { 0xf5, 0x1e, 0x26, 0x02, 0x8a, 0x8c, 0xed, 0xff };
1329 return !memcmp (p
, sysmis
, 8);
1332 static const struct casereader_class pcp_file_casereader_class
=
1334 pcp_file_casereader_read
,
1335 pcp_file_casereader_destroy
,
1340 const struct any_reader_class pcp_file_reader_class
=
1342 N_("SPSS/PC+ System File"),