Change how checking for missing values works.
[pspp.git] / src / data / missing-values.c
blob44d0bf9ee67315148224715f3ae4f083019c3636
1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2005, 2009, 2011, 2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 #include <config.h>
19 #include "data/missing-values.h"
21 #include <assert.h>
22 #include <stdlib.h>
24 #include "data/variable.h"
25 #include "libpspp/assertion.h"
26 #include "libpspp/cast.h"
27 #include "libpspp/i18n.h"
28 #include "libpspp/str.h"
30 #include "gl/minmax.h"
32 /* Types of user-missing values.
33 Invisible--use access functions defined below instead. */
34 enum mv_type
36 MVT_NONE = 0, /* No user-missing values. */
37 MVT_1 = 1, /* One user-missing value. */
38 MVT_2 = 2, /* Two user-missing values. */
39 MVT_3 = 3, /* Three user-missing values. */
40 MVT_RANGE = 4, /* A range of user-missing values. */
41 MVT_RANGE_1 = 5 /* A range plus an individual value. */
44 /* Initializes MV as a set of missing values for a variable of
45 the given WIDTH. MV should be destroyed with mv_destroy when
46 it is no longer needed. */
47 void
48 mv_init (struct missing_values *mv, int width)
50 int i;
52 assert (width >= 0 && width <= MAX_STRING);
53 mv->type = MVT_NONE;
54 mv->width = width;
55 for (i = 0; i < 3; i++)
56 value_init (&mv->values[i], width);
59 /* Initializes MV as a set of missing values for a variable of
60 the given WIDTH. MV will be automatically destroyed along
61 with POOL; it must not be passed to mv_destroy for explicit
62 destruction. */
63 void
64 mv_init_pool (struct pool *pool, struct missing_values *mv, int width)
66 int i;
68 assert (width >= 0 && width <= MAX_STRING);
69 mv->type = MVT_NONE;
70 mv->width = width;
71 for (i = 0; i < 3; i++)
72 value_init_pool (pool, &mv->values[i], width);
75 /* Frees any storage allocated by mv_init for MV. */
76 void
77 mv_destroy (struct missing_values *mv)
79 if (mv != NULL)
81 int i;
83 for (i = 0; i < 3; i++)
84 value_destroy (&mv->values[i], mv->width);
88 /* Removes any missing values from MV. */
89 void
90 mv_clear (struct missing_values *mv)
92 mv->type = MVT_NONE;
95 /* Initializes MV as a copy of SRC. */
96 void
97 mv_copy (struct missing_values *mv, const struct missing_values *src)
99 int i;
101 mv_init (mv, src->width);
102 mv->type = src->type;
103 for (i = 0; i < 3; i++)
104 value_copy (&mv->values[i], &src->values[i], mv->width);
107 /* Returns true if VALUE, of the given WIDTH, may be added to a
108 missing value set also of the given WIDTH. This is normally
109 the case, but string missing values over MV_MAX_STRING bytes
110 long must consist solely of spaces after the first
111 MV_MAX_STRING bytes. */
112 bool
113 mv_is_acceptable (const union value *value, int width)
115 int i;
117 for (i = MV_MAX_STRING; i < width; i++)
118 if (value->s[i] != ' ')
119 return false;
120 return true;
123 /* Returns true if MV is an empty set of missing values. */
124 bool
125 mv_is_empty (const struct missing_values *mv)
127 return mv->type == MVT_NONE;
130 /* Returns the width of the missing values that MV may
131 contain. */
133 mv_get_width (const struct missing_values *mv)
135 return mv->width;
138 /* Attempts to add individual value V to the set of missing
139 values MV. Returns true if successful, false if MV has no
140 more room for missing values or if V is not an acceptable
141 missing value. */
142 bool
143 mv_add_value (struct missing_values *mv, const union value *v)
145 if (!mv_is_acceptable (v, mv->width))
146 return false;
148 switch (mv->type)
150 case MVT_NONE:
151 case MVT_1:
152 case MVT_2:
153 case MVT_RANGE:
154 value_copy (&mv->values[mv->type & 3], v, mv->width);
155 mv->type++;
156 return true;
158 case MVT_3:
159 case MVT_RANGE_1:
160 return false;
162 NOT_REACHED ();
165 /* Attempts to add S, which is LEN bytes long, to the set of string missing
166 values MV. Returns true if successful, false if MV has no more room for
167 missing values or if S is not an acceptable missing value. */
168 bool
169 mv_add_str (struct missing_values *mv, const uint8_t s[], size_t len)
171 union value v;
172 bool ok;
174 assert (mv->width > 0);
175 while (len > mv->width)
176 if (s[--len] != ' ')
177 return false;
179 value_init (&v, mv->width);
180 buf_copy_rpad (CHAR_CAST (char *, v.s), mv->width,
181 CHAR_CAST (char *, s), len, ' ');
182 ok = mv_add_value (mv, &v);
183 value_destroy (&v, mv->width);
185 return ok;
188 /* Attempts to add D to the set of numeric missing values MV.
189 Returns true if successful, false if MV has no more room for
190 missing values. */
191 bool
192 mv_add_num (struct missing_values *mv, double d)
194 union value v;
195 bool ok;
197 assert (mv->width == 0);
198 value_init (&v, 0);
199 v.f = d;
200 ok = mv_add_value (mv, &v);
201 value_destroy (&v, 0);
203 return ok;
206 /* Attempts to add range [LOW, HIGH] to the set of numeric
207 missing values MV. Returns true if successful, false if MV
208 has no room for a range, or if LOW > HIGH. */
209 bool
210 mv_add_range (struct missing_values *mv, double low, double high)
212 assert (mv->width == 0);
213 if (low <= high && (mv->type == MVT_NONE || mv->type == MVT_1))
215 mv->values[1].f = low;
216 mv->values[2].f = high;
217 mv->type |= 4;
218 return true;
220 else
221 return false;
224 /* Returns true if MV contains an individual value,
225 false if MV is empty (or contains only a range). */
226 bool
227 mv_has_value (const struct missing_values *mv)
229 return mv_n_values (mv) > 0;
232 /* Removes one individual value from MV and stores it in V, which
233 must have been initialized as a value with the same width as MV.
234 MV must contain an individual value (as determined by
235 mv_has_value()).
237 We remove the first value from MV, not the last, because the
238 common use for this function is in iterating through a set of
239 missing values. If we remove the last value then we'll output
240 the missing values in order opposite of that in which they
241 were added, so that a GET followed by a SAVE would reverse the
242 order of missing values in the system file, a weird effect. */
243 void
244 mv_pop_value (struct missing_values *mv, union value *v)
246 union value tmp;
248 assert (mv_has_value (mv));
250 value_copy (v, &mv->values[0], mv->width);
251 tmp = mv->values[0];
252 mv->values[0] = mv->values[1];
253 mv->values[1] = mv->values[2];
254 mv->values[2] = tmp;
255 mv->type--;
258 /* Returns MV's discrete value with index IDX. The caller must
259 not modify or free this value, or access it after MV is
260 modified or freed.
261 IDX must be less than the number of discrete values in MV, as
262 reported by mv_n_values. */
263 const union value *
264 mv_get_value (const struct missing_values *mv, int idx)
266 assert (idx >= 0 && idx < mv_n_values (mv));
267 return &mv->values[idx];
270 /* Replaces MV's discrete value with index IDX by a copy of V,
271 which must have the same width as MV.
272 IDX must be less than the number of discrete values in MV, as
273 reported by mv_n_values. */
274 bool
275 mv_replace_value (struct missing_values *mv, const union value *v, int idx)
277 assert (idx >= 0) ;
278 assert (idx < mv_n_values(mv));
280 if (!mv_is_acceptable (v, mv->width))
281 return false;
283 value_copy (&mv->values[idx], v, mv->width);
284 return true;
287 /* Returns the number of individual (not part of a range) missing
288 values in MV. */
290 mv_n_values (const struct missing_values *mv)
292 return mv->type & 3;
296 /* Returns true if MV contains a numeric range,
297 false if MV is empty (or contains only individual values). */
298 bool
299 mv_has_range (const struct missing_values *mv)
301 return mv->type == MVT_RANGE || mv->type == MVT_RANGE_1;
304 /* Removes the numeric range from MV and stores it in *LOW and
305 *HIGH. MV must contain a individual range (as determined by
306 mv_has_range()). */
307 void
308 mv_pop_range (struct missing_values *mv, double *low, double *high)
310 assert (mv_has_range (mv));
311 *low = mv->values[1].f;
312 *high = mv->values[2].f;
313 mv->type &= 3;
316 /* Returns the numeric range from MV into *LOW and
317 *HIGH. MV must contain a individual range (as determined by
318 mv_has_range()). */
319 void
320 mv_get_range (const struct missing_values *mv, double *low, double *high)
322 assert (mv_has_range (mv));
323 *low = mv->values[1].f;
324 *high = mv->values[2].f;
327 /* Returns true if values[IDX] is in use when the `type' member
328 is set to TYPE (in struct missing_values),
329 false otherwise. */
330 static bool
331 using_element (unsigned type, int idx)
333 assert (idx >= 0 && idx < 3);
335 switch (type)
337 case MVT_NONE:
338 return false;
339 case MVT_1:
340 return idx < 1;
341 case MVT_2:
342 return idx < 2;
343 case MVT_3:
344 return true;
345 case MVT_RANGE:
346 return idx > 0;
347 case MVT_RANGE_1:
348 return true;
350 NOT_REACHED ();
353 /* Returns true if MV can be resized to the given WIDTH with
354 mv_resize(), false otherwise. Resizing is possible only when
355 each value in MV (if any) is resizable from MV's current width
356 to WIDTH, as determined by value_is_resizable. */
357 bool
358 mv_is_resizable (const struct missing_values *mv, int width)
360 int i;
362 for (i = 0; i < 3; i++)
363 if (using_element (mv->type, i)
364 && !value_is_resizable (&mv->values[i], mv->width, width))
365 return false;
367 return true;
370 /* Resizes MV to the given WIDTH. WIDTH must fit the constraints
371 explained for mv_is_resizable. */
372 void
373 mv_resize (struct missing_values *mv, int width)
375 int i;
377 assert (mv_is_resizable (mv, width));
378 for (i = 0; i < 3; i++)
379 if (using_element (mv->type, i))
380 value_resize (&mv->values[i], mv->width, width);
381 else
383 value_destroy (&mv->values[i], mv->width);
384 value_init (&mv->values[i], width);
386 mv->width = width;
389 /* Returns true if D is a missing value in MV, false otherwise.
390 MV must be a set of numeric missing values. */
391 static bool
392 is_num_user_missing (const struct missing_values *mv, double d)
394 const union value *v = mv->values;
395 assert (mv->width == 0);
396 switch (mv->type)
398 case MVT_NONE:
399 return false;
400 case MVT_1:
401 return v[0].f == d;
402 case MVT_2:
403 return v[0].f == d || v[1].f == d;
404 case MVT_3:
405 return v[0].f == d || v[1].f == d || v[2].f == d;
406 case MVT_RANGE:
407 return v[1].f <= d && d <= v[2].f;
408 case MVT_RANGE_1:
409 return v[0].f == d || (v[1].f <= d && d <= v[2].f);
411 NOT_REACHED ();
414 /* Returns true if S[] is a missing value in MV, false otherwise.
415 MV must be a set of string missing values.
416 S[] must contain exactly as many characters as MV's width. */
417 static bool
418 is_str_user_missing (const struct missing_values *mv, const uint8_t s[])
420 const union value *v = mv->values;
421 assert (mv->width > 0);
422 switch (mv->type)
424 case MVT_NONE:
425 return false;
426 case MVT_1:
427 return !memcmp (v[0].s, s, mv->width);
428 case MVT_2:
429 return (!memcmp (v[0].s, s, mv->width)
430 || !memcmp (v[1].s, s, mv->width));
431 case MVT_3:
432 return (!memcmp (v[0].s, s, mv->width)
433 || !memcmp (v[1].s, s, mv->width)
434 || !memcmp (v[2].s, s, mv->width));
435 case MVT_RANGE:
436 case MVT_RANGE_1:
437 NOT_REACHED ();
439 NOT_REACHED ();
442 /* Returns MV_USER if V is a user-missing value in MV, MV_SYSTEM if V is
443 system-missing (and MV is numeric), or 0 if V is not missing. */
444 enum mv_class
445 mv_is_value_missing (const struct missing_values *mv, const union value *v)
447 return (mv->width == 0
448 ? mv_is_num_missing (mv, v->f)
449 : mv_is_str_missing (mv, v->s));
452 /* Returns MV_USER if V is a user-missing value in MV, MV_SYSTEM if V is
453 system-missing, or 0 if V is not missing. MV must be a set of numeric
454 missing values. */
455 enum mv_class
456 mv_is_num_missing (const struct missing_values *mv, double d)
458 assert (mv->width == 0);
459 return (d == SYSMIS ? MV_SYSTEM
460 : is_num_user_missing (mv, d) ? MV_USER
461 : 0);
464 /* Returns MV_USER if S[] is a user-missing value in MV, or 0 if V is not
465 missing. MV must be a set of string missing values. S[] must contain
466 exactly as many characters as MV's width. */
467 enum mv_class
468 mv_is_str_missing (const struct missing_values *mv, const uint8_t s[])
470 assert (mv->width > 0);
471 return is_str_user_missing (mv, s) ? MV_USER : 0;
474 /* Like mv_is_value_missing(), this tests whether V is a missing value in MV.
475 It supports the uncommon case where V and MV might have different widths:
476 the caller must specify VW, the width of V. MV and VW must be both numeric
477 or both string.
479 Comparison of strings of different width is done by conceptually extending
480 both strings to infinite width by appending spaces. */
481 enum mv_class
482 mv_is_value_missing_varwidth (const struct missing_values *mv,
483 const union value *v, int vw)
485 int mvw = mv->width;
486 if (mvw == vw)
487 return mv_is_value_missing (mv, v);
489 /* Make sure they're both strings. */
490 assert (mvw && vw);
491 if (mv->type == MVT_NONE)
492 return false;
494 for (int i = 0; i < mv->type; i++)
495 if (!buf_compare_rpad (CHAR_CAST_BUG (const char *, mv->values[i].s), mvw,
496 CHAR_CAST_BUG (const char *, v->s), vw))
497 return MV_USER;
498 return 0;
501 char *
502 mv_to_string (const struct missing_values *mv, const char *encoding)
504 struct string s = DS_EMPTY_INITIALIZER;
505 if (mv_has_range (mv))
507 double x, y;
508 mv_get_range (mv, &x, &y);
509 if (x == LOWEST)
510 ds_put_format (&s, "LOWEST THRU %.*g", DBL_DIG + 1, y);
511 else if (y == HIGHEST)
512 ds_put_format (&s, "%.*g THRU HIGHEST", DBL_DIG + 1, x);
513 else
514 ds_put_format (&s, "%.*g THRU %.*g",
515 DBL_DIG + 1, x,
516 DBL_DIG + 1, y);
518 for (size_t j = 0; j < mv_n_values (mv); j++)
520 const union value *value = mv_get_value (mv, j);
521 if (!ds_is_empty (&s))
522 ds_put_cstr (&s, "; ");
523 if (!mv->width)
524 ds_put_format (&s, "%.*g", DBL_DIG + 1, value->f);
525 else
527 char *mvs = recode_string (
528 "UTF-8", encoding, CHAR_CAST (char *, value->s),
529 MIN (mv->width, MV_MAX_STRING));
530 ds_put_format (&s, "\"%s\"", mvs);
531 free (mvs);
534 return ds_is_empty (&s) ? NULL : ds_steal_cstr (&s);