psppire: Sort by name or label case-insensitively in dictionary view.
[pspp.git] / src / data / case.c
blob9f1c404193911651e9192277b42204bd3ca15d70
1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2004, 2007, 2009, 2010, 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
17 #include <config.h>
19 #include "data/case.h"
21 #include <limits.h>
22 #include <stddef.h>
23 #include <stdlib.h>
25 #include "data/value.h"
26 #include "data/variable.h"
27 #include "libpspp/assertion.h"
28 #include "libpspp/str.h"
30 #include "gl/minmax.h"
31 #include "gl/xalloc.h"
33 /* Set this flag to 1 to copy cases instead of ref counting them.
34 This is sometimes helpful in debugging situations. */
35 #define DEBUG_CASEREFS 0
37 #if DEBUG_CASEREFS
38 #warning "Caseref debug enabled. CASES ARE NOT BEING SHARED!!"
39 #endif
41 static size_t case_size (const struct caseproto *);
42 static void assert_variable_matches_case (const struct ccase *,
43 const struct variable *);
44 static void copy_forward (struct ccase *dst, size_t dst_idx,
45 const struct ccase *src, size_t src_idx,
46 size_t n_values);
47 static void copy_backward (struct ccase *dst, size_t dst_idx,
48 const struct ccase *src, size_t src_idx,
49 size_t n_values);
51 /* Creates and returns a new case that stores data of the form
52 specified by PROTO. The data in the case have indeterminate
53 contents until explicitly written.
55 The caller retains ownership of PROTO. */
56 struct ccase *
57 case_create (const struct caseproto *proto)
59 struct ccase *c = case_try_create (proto);
60 if (c == NULL)
61 xalloc_die ();
62 return c;
65 /* Like case_create, but returns a null pointer if not enough
66 memory is available. */
67 struct ccase *
68 case_try_create (const struct caseproto *proto)
70 struct ccase *c = malloc (case_size (proto));
71 if (c != NULL)
73 if (caseproto_try_init_values (proto, c->values))
75 c->proto = caseproto_ref (proto);
76 c->ref_cnt = 1;
77 return c;
79 free (c);
81 return NULL;
84 /* Creates and returns an unshared copy of case C. */
85 struct ccase *
86 case_clone (const struct ccase *c)
88 return case_unshare (case_ref (c));
91 /* Increments case C's reference count and returns C. Afterward,
92 case C is shared among its reference count holders. */
93 struct ccase *
94 case_ref (const struct ccase *c_)
96 struct ccase *c = CONST_CAST (struct ccase *, c_);
97 c->ref_cnt++;
98 #if DEBUG_CASEREFS
99 c = case_unshare__ (c);
100 #endif
101 return c;
104 /* Returns an estimate of the number of bytes of memory that
105 would be consumed in creating a case based on PROTO. The
106 estimate includes typical overhead from malloc() in addition
107 to the actual size of data. */
108 size_t
109 case_get_cost (const struct caseproto *proto)
111 /* FIXME: improve approximation? */
112 return (1 + caseproto_get_n_widths (proto)
113 + 3 * caseproto_get_n_strings (proto)) * sizeof (union value);
116 /* Changes the prototype for case C, which must not be shared.
117 The new PROTO must be conformable with C's current prototype
118 (as defined by caseproto_is_conformable).
120 Any new values created by this function have indeterminate
121 content that the caller is responsible for initializing.
123 The caller retains ownership of PROTO.
125 Returns a new case that replaces C, which is freed. */
126 struct ccase *
127 case_resize (struct ccase *c, const struct caseproto *new_proto)
129 struct caseproto *old_proto = c->proto;
130 size_t old_n_widths = caseproto_get_n_widths (old_proto);
131 size_t new_n_widths = caseproto_get_n_widths (new_proto);
133 assert (!case_is_shared (c));
134 expensive_assert (caseproto_is_conformable (old_proto, new_proto));
136 if (old_n_widths != new_n_widths)
138 if (new_n_widths < old_n_widths)
139 caseproto_reinit_values (old_proto, new_proto, c->values);
140 c = xrealloc (c, case_size (new_proto));
141 if (new_n_widths > old_n_widths)
142 caseproto_reinit_values (old_proto, new_proto, c->values);
144 caseproto_unref (old_proto);
145 c->proto = caseproto_ref (new_proto);
148 return c;
151 /* case_unshare_and_resize(C, PROTO) is equivalent to
152 case_resize(case_unshare(C), PROTO), but it is faster if case
153 C is shared.
155 Any new values created by this function have indeterminate
156 content that the caller is responsible for initializing.
158 The caller retains ownership of PROTO.
160 Returns the new case that replaces C, which is freed. */
161 struct ccase *
162 case_unshare_and_resize (struct ccase *c, const struct caseproto *proto)
164 if (!case_is_shared (c))
165 return case_resize (c, proto);
166 else
168 struct ccase *new = case_create (proto);
169 size_t old_n_values = caseproto_get_n_widths (c->proto);
170 size_t new_n_values = caseproto_get_n_widths (proto);
171 case_copy (new, 0, c, 0, MIN (old_n_values, new_n_values));
172 c->ref_cnt--;
173 return new;
177 /* Sets all of the numeric values in case C to the system-missing
178 value, and all of the string values to spaces. */
179 void
180 case_set_missing (struct ccase *c)
182 size_t i;
184 assert (!case_is_shared (c));
185 for (i = 0; i < caseproto_get_n_widths (c->proto); i++)
186 value_set_missing (&c->values[i], caseproto_get_width (c->proto, i));
189 /* Copies N_VALUES values from SRC (starting at SRC_IDX) to DST
190 (starting at DST_IDX). Each value that is copied into must
191 have the same width as the value that it is copied from.
193 Properly handles overlapping ranges when DST == SRC.
195 DST must not be shared. */
196 void
197 case_copy (struct ccase *dst, size_t dst_idx,
198 const struct ccase *src, size_t src_idx,
199 size_t n_values)
201 assert (!case_is_shared (dst));
202 assert (caseproto_range_is_valid (dst->proto, dst_idx, n_values));
203 assert (caseproto_range_is_valid (src->proto, src_idx, n_values));
204 assert (caseproto_range_equal (dst->proto, dst_idx, src->proto, src_idx,
205 n_values));
207 if (dst != src)
209 if (!dst->proto->n_strings || !src->proto->n_strings)
210 memcpy (&dst->values[dst_idx], &src->values[src_idx],
211 sizeof dst->values[0] * n_values);
212 else
213 copy_forward (dst, dst_idx, src, src_idx, n_values);
215 else if (dst_idx != src_idx)
217 if (!dst->proto->n_strings)
218 memmove (&dst->values[dst_idx], &src->values[src_idx],
219 sizeof dst->values[0] * n_values);
220 else if (dst_idx < src_idx)
221 copy_forward (dst, dst_idx, src, src_idx, n_values);
222 else /* dst_idx > src_idx */
223 copy_backward (dst, dst_idx, src, src_idx, n_values);
227 /* Copies N_VALUES values out of case C to VALUES, starting at
228 the given START_IDX. */
229 void
230 case_copy_out (const struct ccase *c,
231 size_t start_idx, union value *values, size_t n_values)
233 size_t i;
235 assert (caseproto_range_is_valid (c->proto, start_idx, n_values));
237 for (i = 0; i < n_values; i++)
238 value_copy (&values[i], &c->values[start_idx + i],
239 caseproto_get_width (c->proto, start_idx + i));
242 /* Copies N_VALUES values from VALUES into case C, starting at
243 the given START_IDX.
245 C must not be shared. */
246 void
247 case_copy_in (struct ccase *c,
248 size_t start_idx, const union value *values, size_t n_values)
250 size_t i;
252 assert (!case_is_shared (c));
253 assert (caseproto_range_is_valid (c->proto, start_idx, n_values));
255 for (i = 0; i < n_values; i++)
256 value_copy (&c->values[start_idx + i], &values[i],
257 caseproto_get_width (c->proto, start_idx + i));
260 /* Returns a pointer to the `union value' used for the
261 element of C for variable V.
262 Case C must be drawn from V's dictionary.
263 The caller must not modify the returned data. */
264 const union value *
265 case_data (const struct ccase *c, const struct variable *v)
267 assert_variable_matches_case (c, v);
268 return &c->values[var_get_dict_index (v)];
271 /* Returns a pointer to the `union value' used for the element of
272 C numbered IDX. The caller must not modify the returned
273 data. */
274 const union value *
275 case_data_idx (const struct ccase *c, size_t idx)
277 assert (idx < c->proto->n_widths);
278 return &c->values[idx];
281 /* Returns a pointer to the `union value' used for the element of
282 C for variable V. Case C must be drawn from V's dictionary.
283 The caller is allowed to modify the returned data.
285 Case C must not be shared. */
286 union value *
287 case_data_rw (struct ccase *c, const struct variable *v)
289 assert_variable_matches_case (c, v);
290 assert (!case_is_shared (c));
291 return &c->values[var_get_dict_index (v)];
294 /* Returns a pointer to the `union value' used for the
295 element of C numbered IDX.
296 The caller is allowed to modify the returned data.
298 Case C must not be shared. */
299 union value *
300 case_data_rw_idx (struct ccase *c, size_t idx)
302 assert (idx < c->proto->n_widths);
303 assert (!case_is_shared (c));
304 return &c->values[idx];
307 /* Returns the numeric value of the `union value' in C for
308 variable V.
309 Case C must be drawn from V's dictionary. */
310 double
311 case_num (const struct ccase *c, const struct variable *v)
313 assert_variable_matches_case (c, v);
314 return c->values[var_get_dict_index (v)].f;
317 /* Returns the numeric value of the `union value' in C numbered
318 IDX. */
319 double
320 case_num_idx (const struct ccase *c, size_t idx)
322 assert (idx < c->proto->n_widths);
323 return c->values[idx].f;
326 /* Returns a pointer to the `double' in the `union value' in C for variable V.
327 The caller is allowed to modify the returned data.
329 Case C must be drawn from V's dictionary and must not be shared. */
330 double *
331 case_num_rw (struct ccase *c, const struct variable *v)
333 assert_variable_matches_case (c, v);
334 assert (!case_is_shared (c));
335 return &c->values[var_get_dict_index (v)].f;
338 /* Returns a pointer to the `double' in the `union value' in C numbered IDX.
339 The caller is allowed to modify the returned data.
341 Case C must not be shared. */
342 double *
343 case_num_rw_idx (struct ccase *c, size_t idx)
345 assert (!case_is_shared (c));
346 return &c->values[idx].f;
349 /* Returns the string value of the `union value' in C for
350 variable V. Case C must be drawn from V's dictionary. The
351 caller must not modify the return value.
353 Like the strings embedded in all "union value"s, the return
354 value is not null-terminated. */
355 const uint8_t *
356 case_str (const struct ccase *c, const struct variable *v)
358 assert_variable_matches_case (c, v);
359 return c->values[var_get_dict_index (v)].s;
362 /* Returns the string value of the `union value' in C numbered
363 IDX. The caller must not modify the return value.
365 Like the strings embedded in all "union value"s, the return
366 value is not null-terminated. */
367 const uint8_t *
368 case_str_idx (const struct ccase *c, size_t idx)
370 assert (idx < c->proto->n_widths);
371 return c->values[idx].s;
374 /* Returns a substring for the `union value' in C for variable V. Case C must
375 be drawn from V's dictionary. */
376 struct substring
377 case_ss (const struct ccase *c, const struct variable *v)
379 assert_variable_matches_case (c, v);
380 return ss_buffer (CHAR_CAST (char *, c->values[var_get_dict_index (v)].s),
381 var_get_width (v));
384 /* Returns a substring for the `union value' in C numbered IDX. WIDTH must be
385 the value's width. */
386 struct substring
387 case_ss_idx (const struct ccase *c, size_t width, size_t idx)
389 assert (width > 0);
390 assert (idx < c->proto->n_widths);
391 return ss_buffer (CHAR_CAST (char *, c->values[idx].s), width);
394 /* Returns the string value of the `union value' in C for
395 variable V. Case C must be drawn from V's dictionary. The
396 caller may modify the return value.
398 Case C must not be shared.
400 Like the strings embedded in all "union value"s, the return
401 value is not null-terminated. */
402 uint8_t *
403 case_str_rw (struct ccase *c, const struct variable *v)
405 assert_variable_matches_case (c, v);
406 size_t idx = var_get_dict_index (v);
407 assert (!case_is_shared (c));
408 return c->values[idx].s;
411 /* Returns the string value of the `union value' in C numbered
412 IDX. The caller may modify the return value.
414 Case C must not be shared.
416 Like the strings embedded in all "union value"s, the return
417 value is not null-terminated. */
418 uint8_t *
419 case_str_rw_idx (struct ccase *c, size_t idx)
421 assert (idx < c->proto->n_widths);
422 assert (!case_is_shared (c));
423 return c->values[idx].s;
426 /* Compares the values of the N_VARS variables in VP
427 in cases A and B and returns a strcmp()-type result. */
429 case_compare (const struct ccase *a, const struct ccase *b,
430 const struct variable *const *vp, size_t n_vars)
432 return case_compare_2dict (a, b, vp, vp, n_vars);
435 /* Compares the values of the N_VARS variables in VAP in case CA
436 to the values of the N_VARS variables in VBP in CB
437 and returns a strcmp()-type result. */
439 case_compare_2dict (const struct ccase *ca, const struct ccase *cb,
440 const struct variable *const *vap,
441 const struct variable *const *vbp,
442 size_t n_vars)
444 int cmp = 0;
445 for (; !cmp && n_vars-- > 0; vap++, vbp++)
447 const union value *va = case_data (ca, *vap);
448 const union value *vb = case_data (cb, *vbp);
449 assert (var_get_width (*vap) == var_get_width (*vbp));
450 cmp = value_compare_3way (va, vb, var_get_width (*vap));
452 return cmp;
455 /* Returns a pointer to the array of `union value's used for C.
456 The caller must *not* modify the returned data.
458 This function breaks the case abstraction. It should *not* be
459 commonly used. Prefer the other case functions. */
460 const union value *
461 case_data_all (const struct ccase *c)
463 return c->values;
466 /* Returns a pointer to the array of `union value's used for C.
467 The caller is allowed to modify the returned data.
469 Case C must not be shared.
471 This function breaks the case abstraction. It should *not* be
472 commonly used. Prefer the other case functions. */
473 union value *
474 case_data_all_rw (struct ccase *c)
476 assert (!case_is_shared (c));
477 return c->values;
480 /* Internal helper function for case_unshare. */
481 struct ccase *
482 case_unshare__ (struct ccase *old)
484 struct ccase *new = case_create (old->proto);
485 case_copy (new, 0, old, 0, caseproto_get_n_widths (new->proto));
486 --old->ref_cnt;
487 return new;
490 /* Internal helper function for case_unref. */
491 void
492 case_unref__ (struct ccase *c)
494 caseproto_destroy_values (c->proto, c->values);
495 caseproto_unref (c->proto);
496 free (c);
499 /* Returns the number of bytes needed by a case for case
500 prototype PROTO. */
501 static size_t
502 case_size (const struct caseproto *proto)
504 return (offsetof (struct ccase, values)
505 + caseproto_get_n_widths (proto) * sizeof (union value));
508 /* Returns true if C contains a value at V's case index with the
509 same width as V; that is, if V may plausibly be used to read
510 or write data in C.
512 Useful in assertions. */
513 static void
514 assert_variable_matches_case (const struct ccase *c, const struct variable *v)
516 size_t var_idx = var_get_dict_index (v);
517 assert (var_idx < caseproto_get_n_widths (c->proto));
518 assert (caseproto_get_width (c->proto, var_idx) == var_get_width (v));
521 /* Internal helper function for case_copy(). */
522 static void
523 copy_forward (struct ccase *dst, size_t dst_idx,
524 const struct ccase *src, size_t src_idx,
525 size_t n_values)
527 size_t i;
529 for (i = 0; i < n_values; i++)
530 value_copy (&dst->values[dst_idx + i], &src->values[src_idx + i],
531 caseproto_get_width (dst->proto, dst_idx + i));
534 /* Internal helper function for case_copy(). */
535 static void
536 copy_backward (struct ccase *dst, size_t dst_idx,
537 const struct ccase *src, size_t src_idx,
538 size_t n_values)
540 size_t i;
542 for (i = n_values; i-- != 0;)
543 value_copy (&dst->values[dst_idx + i], &src->values[src_idx + i],
544 caseproto_get_width (dst->proto, dst_idx + i));