docs: Clarify error values for empty files when loading
[glib.git] / gvdb-reader.c
blobaa3154feb901f244fa184a66ac7198072c038c4f
1 /*
2 * Copyright © 2010 Codethink Limited
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
17 * Author: Ryan Lortie <desrt@desrt.ca>
20 #include "gvdb-reader.h"
21 #include "gvdb-format.h"
23 #include <string.h>
25 struct _GvdbTable {
26 GBytes *bytes;
28 const gchar *data;
29 gsize size;
31 gboolean byteswapped;
32 gboolean trusted;
34 const guint32_le *bloom_words;
35 guint32 n_bloom_words;
36 guint bloom_shift;
38 const guint32_le *hash_buckets;
39 guint32 n_buckets;
41 struct gvdb_hash_item *hash_items;
42 guint32 n_hash_items;
45 static const gchar *
46 gvdb_table_item_get_key (GvdbTable *file,
47 const struct gvdb_hash_item *item,
48 gsize *size)
50 guint32 start, end;
52 start = guint32_from_le (item->key_start);
53 *size = guint16_from_le (item->key_size);
54 end = start + *size;
56 if G_UNLIKELY (start > end || end > file->size)
57 return NULL;
59 return file->data + start;
62 static gconstpointer
63 gvdb_table_dereference (GvdbTable *file,
64 const struct gvdb_pointer *pointer,
65 gint alignment,
66 gsize *size)
68 guint32 start, end;
70 start = guint32_from_le (pointer->start);
71 end = guint32_from_le (pointer->end);
73 if G_UNLIKELY (start > end || end > file->size || start & (alignment - 1))
74 return NULL;
76 *size = end - start;
78 return file->data + start;
81 static void
82 gvdb_table_setup_root (GvdbTable *file,
83 const struct gvdb_pointer *pointer)
85 const struct gvdb_hash_header *header;
86 guint32 n_bloom_words;
87 guint32 n_buckets;
88 gsize size;
90 header = gvdb_table_dereference (file, pointer, 4, &size);
92 if G_UNLIKELY (header == NULL || size < sizeof *header)
93 return;
95 size -= sizeof *header;
97 n_bloom_words = guint32_from_le (header->n_bloom_words);
98 n_buckets = guint32_from_le (header->n_buckets);
99 n_bloom_words &= (1u << 27) - 1;
101 if G_UNLIKELY (n_bloom_words * sizeof (guint32_le) > size)
102 return;
104 file->bloom_words = (gpointer) (header + 1);
105 size -= n_bloom_words * sizeof (guint32_le);
106 file->n_bloom_words = n_bloom_words;
108 if G_UNLIKELY (n_buckets > G_MAXUINT / sizeof (guint32_le) ||
109 n_buckets * sizeof (guint32_le) > size)
110 return;
112 file->hash_buckets = file->bloom_words + file->n_bloom_words;
113 size -= n_buckets * sizeof (guint32_le);
114 file->n_buckets = n_buckets;
116 if G_UNLIKELY (size % sizeof (struct gvdb_hash_item))
117 return;
119 file->hash_items = (gpointer) (file->hash_buckets + n_buckets);
120 file->n_hash_items = size / sizeof (struct gvdb_hash_item);
124 * gvdb_table_new_from_bytes:
125 * @bytes: the #GBytes with the data
126 * @trusted: if the contents of @bytes are trusted
127 * @error: %NULL, or a pointer to a %NULL #GError
129 * Creates a new #GvdbTable from the contents of @bytes.
131 * This call can fail if the header contained in @bytes is invalid or if @bytes
132 * is empty; if so, %G_FILE_ERROR_INVAL will be returned.
134 * You should call gvdb_table_free() on the return result when you no
135 * longer require it.
137 * Returns: a new #GvdbTable
139 GvdbTable *
140 gvdb_table_new_from_bytes (GBytes *bytes,
141 gboolean trusted,
142 GError **error)
144 const struct gvdb_header *header;
145 GvdbTable *file;
147 file = g_slice_new0 (GvdbTable);
148 file->bytes = g_bytes_ref (bytes);
149 file->data = g_bytes_get_data (bytes, &file->size);
150 file->trusted = trusted;
152 if (file->size < sizeof (struct gvdb_header))
153 goto invalid;
155 header = (gpointer) file->data;
157 if (header->signature[0] == GVDB_SIGNATURE0 &&
158 header->signature[1] == GVDB_SIGNATURE1 &&
159 guint32_from_le (header->version) == 0)
160 file->byteswapped = FALSE;
162 else if (header->signature[0] == GVDB_SWAPPED_SIGNATURE0 &&
163 header->signature[1] == GVDB_SWAPPED_SIGNATURE1 &&
164 guint32_from_le (header->version) == 0)
165 file->byteswapped = TRUE;
167 else
168 goto invalid;
170 gvdb_table_setup_root (file, &header->root);
172 return file;
174 invalid:
175 g_set_error_literal (error, G_FILE_ERROR, G_FILE_ERROR_INVAL, "invalid gvdb header");
177 g_bytes_unref (file->bytes);
179 g_slice_free (GvdbTable, file);
181 return NULL;
185 * gvdb_table_new:
186 * @filename: a filename
187 * @trusted: if the contents of @bytes are trusted
188 * @error: %NULL, or a pointer to a %NULL #GError
190 * Creates a new #GvdbTable using the #GMappedFile for @filename as the
191 * #GBytes.
193 * This function will fail if the file cannot be opened.
194 * In that case, the #GError that is returned will be an error from
195 * g_mapped_file_new().
197 * An empty or corrupt file will result in %G_FILE_ERROR_INVAL.
199 * Returns: a new #GvdbTable
201 GvdbTable *
202 gvdb_table_new (const gchar *filename,
203 gboolean trusted,
204 GError **error)
206 GMappedFile *mapped;
207 GvdbTable *table;
208 GBytes *bytes;
210 mapped = g_mapped_file_new (filename, FALSE, error);
211 if (!mapped)
212 return NULL;
214 bytes = g_mapped_file_get_bytes (mapped);
215 table = gvdb_table_new_from_bytes (bytes, trusted, error);
216 g_mapped_file_unref (mapped);
217 g_bytes_unref (bytes);
219 g_prefix_error (error, "%s: ", filename);
221 return table;
224 static gboolean
225 gvdb_table_bloom_filter (GvdbTable *file,
226 guint32 hash_value)
228 guint32 word, mask;
230 if (file->n_bloom_words == 0)
231 return TRUE;
233 word = (hash_value / 32) % file->n_bloom_words;
234 mask = 1 << (hash_value & 31);
235 mask |= 1 << ((hash_value >> file->bloom_shift) & 31);
237 return (guint32_from_le (file->bloom_words[word]) & mask) == mask;
240 static gboolean
241 gvdb_table_check_name (GvdbTable *file,
242 struct gvdb_hash_item *item,
243 const gchar *key,
244 guint key_length)
246 const gchar *this_key;
247 gsize this_size;
248 guint32 parent;
250 this_key = gvdb_table_item_get_key (file, item, &this_size);
252 if G_UNLIKELY (this_key == NULL || this_size > key_length)
253 return FALSE;
255 key_length -= this_size;
257 if G_UNLIKELY (memcmp (this_key, key + key_length, this_size) != 0)
258 return FALSE;
260 parent = guint32_from_le (item->parent);
261 if (key_length == 0 && parent == 0xffffffffu)
262 return TRUE;
264 if G_LIKELY (parent < file->n_hash_items && this_size > 0)
265 return gvdb_table_check_name (file,
266 &file->hash_items[parent],
267 key, key_length);
269 return FALSE;
272 static const struct gvdb_hash_item *
273 gvdb_table_lookup (GvdbTable *file,
274 const gchar *key,
275 gchar type)
277 guint32 hash_value = 5381;
278 guint key_length;
279 guint32 bucket;
280 guint32 lastno;
281 guint32 itemno;
283 if G_UNLIKELY (file->n_buckets == 0 || file->n_hash_items == 0)
284 return NULL;
286 for (key_length = 0; key[key_length]; key_length++)
287 hash_value = (hash_value * 33) + ((signed char *) key)[key_length];
289 if (!gvdb_table_bloom_filter (file, hash_value))
290 return NULL;
292 bucket = hash_value % file->n_buckets;
293 itemno = guint32_from_le (file->hash_buckets[bucket]);
295 if (bucket == file->n_buckets - 1 ||
296 (lastno = guint32_from_le(file->hash_buckets[bucket + 1])) > file->n_hash_items)
297 lastno = file->n_hash_items;
299 while G_LIKELY (itemno < lastno)
301 struct gvdb_hash_item *item = &file->hash_items[itemno];
303 if (hash_value == guint32_from_le (item->hash_value))
304 if G_LIKELY (gvdb_table_check_name (file, item, key, key_length))
305 if G_LIKELY (item->type == type)
306 return item;
308 itemno++;
311 return NULL;
314 static gboolean
315 gvdb_table_list_from_item (GvdbTable *table,
316 const struct gvdb_hash_item *item,
317 const guint32_le **list,
318 guint *length)
320 gsize size;
322 *list = gvdb_table_dereference (table, &item->value.pointer, 4, &size);
324 if G_LIKELY (*list == NULL || size % 4)
325 return FALSE;
327 *length = size / 4;
329 return TRUE;
333 * gvdb_table_get_names:
334 * @table: a #GvdbTable
335 * @length: the number of items returned, or %NULL
337 * Gets a list of all names contained in @table.
339 * No call to gvdb_table_get_table(), gvdb_table_list() or
340 * gvdb_table_get_value() will succeed unless it is for one of the
341 * names returned by this function.
343 * Note that some names that are returned may still fail for all of the
344 * above calls in the case of the corrupted file. Note also that the
345 * returned strings may not be utf8.
347 * Returns: a %NULL-terminated list of strings, of length @length
349 gchar **
350 gvdb_table_get_names (GvdbTable *table,
351 gint *length)
353 gchar **names;
354 gint n_names;
355 gint filled;
356 gint total;
357 gint i;
359 /* We generally proceed by iterating over the list of items in the
360 * hash table (in order of appearance) recording them into an array.
362 * Each item has a parent item (except root items). The parent item
363 * forms part of the name of the item. We could go fetching the
364 * parent item chain at the point that we encounter each item but then
365 * we would need to implement some sort of recursion along with checks
366 * for self-referential items.
368 * Instead, we do a number of passes. Each pass will build up one
369 * level of names (starting from the root). We continue to do passes
370 * until no more items are left. The first pass will only add root
371 * items and each further pass will only add items whose direct parent
372 * is an item added in the immediately previous pass. It's also
373 * possible that items get filled if they follow their parent within a
374 * particular pass.
376 * At most we will have a number of passes equal to the depth of the
377 * tree. Self-referential items will never be filled in (since their
378 * parent will have never been filled in). We continue until we have
379 * a pass that fills in no additional items.
381 * This takes an O(n) algorithm and turns it into O(n*m) where m is
382 * the depth of the tree, but in all sane cases the tree won't be very
383 * deep and the constant factor of this algorithm is lower (and the
384 * complexity of coding it, as well).
387 n_names = table->n_hash_items;
388 names = g_new0 (gchar *, n_names + 1);
390 /* 'names' starts out all-NULL. On each pass we record the number
391 * of items changed from NULL to non-NULL in 'filled' so we know if we
392 * should repeat the loop. 'total' counts the total number of items
393 * filled. If 'total' ends up equal to 'n_names' then we know that
394 * 'names' has been completely filled.
397 total = 0;
400 /* Loop until we have filled no more entries */
401 filled = 0;
403 for (i = 0; i < n_names; i++)
405 const struct gvdb_hash_item *item = &table->hash_items[i];
406 const gchar *name;
407 gsize name_length;
408 guint32 parent;
410 /* already got it on a previous pass */
411 if (names[i] != NULL)
412 continue;
414 parent = guint32_from_le (item->parent);
416 if (parent == 0xffffffffu)
418 /* it's a root item */
419 name = gvdb_table_item_get_key (table, item, &name_length);
421 if (name != NULL)
423 names[i] = g_strndup (name, name_length);
424 filled++;
428 else if (parent < n_names && names[parent] != NULL)
430 /* It's a non-root item whose parent was filled in already.
432 * Calculate the name of this item by combining it with
433 * its parent name.
435 name = gvdb_table_item_get_key (table, item, &name_length);
437 if (name != NULL)
439 const gchar *parent_name = names[parent];
440 gsize parent_length;
441 gchar *fullname;
443 parent_length = strlen (parent_name);
444 fullname = g_malloc (parent_length + name_length + 1);
445 memcpy (fullname, parent_name, parent_length);
446 memcpy (fullname + parent_length, name, name_length);
447 fullname[parent_length + name_length] = '\0';
448 names[i] = fullname;
449 filled++;
454 total += filled;
456 while (filled && total < n_names);
458 /* If the table was corrupted then 'names' may have holes in it.
459 * Collapse those.
461 if G_UNLIKELY (total != n_names)
463 GPtrArray *fixed_names;
465 fixed_names = g_ptr_array_new ();
466 for (i = 0; i < n_names; i++)
467 if (names[i] != NULL)
468 g_ptr_array_add (fixed_names, names[i]);
470 g_free (names);
471 n_names = fixed_names->len;
472 g_ptr_array_add (fixed_names, NULL);
473 names = (gchar **) g_ptr_array_free (fixed_names, FALSE);
476 if (length)
477 *length = n_names;
479 return names;
483 * gvdb_table_list:
484 * @file: a #GvdbTable
485 * @key: a string
487 * List all of the keys that appear below @key. The nesting of keys
488 * within the hash file is defined by the program that created the hash
489 * file. One thing is constant: each item in the returned array can be
490 * concatenated to @key to obtain the full name of that key.
492 * It is not possible to tell from this function if a given key is
493 * itself a path, a value, or another hash table; you are expected to
494 * know this for yourself.
496 * You should call g_strfreev() on the return result when you no longer
497 * require it.
499 * Returns: a %NULL-terminated string array
501 gchar **
502 gvdb_table_list (GvdbTable *file,
503 const gchar *key)
505 const struct gvdb_hash_item *item;
506 const guint32_le *list;
507 gchar **strv;
508 guint length;
509 guint i;
511 if ((item = gvdb_table_lookup (file, key, 'L')) == NULL)
512 return NULL;
514 if (!gvdb_table_list_from_item (file, item, &list, &length))
515 return NULL;
517 strv = g_new (gchar *, length + 1);
518 for (i = 0; i < length; i++)
520 guint32 itemno = guint32_from_le (list[i]);
522 if (itemno < file->n_hash_items)
524 const struct gvdb_hash_item *item;
525 const gchar *string;
526 gsize strsize;
528 item = file->hash_items + itemno;
530 string = gvdb_table_item_get_key (file, item, &strsize);
532 if (string != NULL)
533 strv[i] = g_strndup (string, strsize);
534 else
535 strv[i] = g_malloc0 (1);
537 else
538 strv[i] = g_malloc0 (1);
541 strv[i] = NULL;
543 return strv;
547 * gvdb_table_has_value:
548 * @file: a #GvdbTable
549 * @key: a string
551 * Checks for a value named @key in @file.
553 * Note: this function does not consider non-value nodes (other hash
554 * tables, for example).
556 * Returns: %TRUE if @key is in the table
558 gboolean
559 gvdb_table_has_value (GvdbTable *file,
560 const gchar *key)
562 static const struct gvdb_hash_item *item;
563 gsize size;
565 item = gvdb_table_lookup (file, key, 'v');
567 if (item == NULL)
568 return FALSE;
570 return gvdb_table_dereference (file, &item->value.pointer, 8, &size) != NULL;
573 static GVariant *
574 gvdb_table_value_from_item (GvdbTable *table,
575 const struct gvdb_hash_item *item)
577 GVariant *variant, *value;
578 gconstpointer data;
579 GBytes *bytes;
580 gsize size;
582 data = gvdb_table_dereference (table, &item->value.pointer, 8, &size);
584 if G_UNLIKELY (data == NULL)
585 return NULL;
587 bytes = g_bytes_new_from_bytes (table->bytes, ((gchar *) data) - table->data, size);
588 variant = g_variant_new_from_bytes (G_VARIANT_TYPE_VARIANT, bytes, table->trusted);
589 value = g_variant_get_variant (variant);
590 g_variant_unref (variant);
591 g_bytes_unref (bytes);
593 return value;
597 * gvdb_table_get_value:
598 * @file: a #GvdbTable
599 * @key: a string
601 * Looks up a value named @key in @file.
603 * If the value is not found then %NULL is returned. Otherwise, a new
604 * #GVariant instance is returned. The #GVariant does not depend on the
605 * continued existence of @file.
607 * You should call g_variant_unref() on the return result when you no
608 * longer require it.
610 * Returns: a #GVariant, or %NULL
612 GVariant *
613 gvdb_table_get_value (GvdbTable *file,
614 const gchar *key)
616 const struct gvdb_hash_item *item;
617 GVariant *value;
619 if ((item = gvdb_table_lookup (file, key, 'v')) == NULL)
620 return NULL;
622 value = gvdb_table_value_from_item (file, item);
624 if (value && file->byteswapped)
626 GVariant *tmp;
628 tmp = g_variant_byteswap (value);
629 g_variant_unref (value);
630 value = tmp;
633 return value;
637 * gvdb_table_get_raw_value:
638 * @table: a #GvdbTable
639 * @key: a string
641 * Looks up a value named @key in @file.
643 * This call is equivalent to gvdb_table_get_value() except that it
644 * never byteswaps the value.
646 * Returns: a #GVariant, or %NULL
648 GVariant *
649 gvdb_table_get_raw_value (GvdbTable *table,
650 const gchar *key)
652 const struct gvdb_hash_item *item;
654 if ((item = gvdb_table_lookup (table, key, 'v')) == NULL)
655 return NULL;
657 return gvdb_table_value_from_item (table, item);
661 * gvdb_table_get_table:
662 * @file: a #GvdbTable
663 * @key: a string
665 * Looks up the hash table named @key in @file.
667 * The toplevel hash table in a #GvdbTable can contain reference to
668 * child hash tables (and those can contain further references...).
670 * If @key is not found in @file then %NULL is returned. Otherwise, a
671 * new #GvdbTable is returned, referring to the child hashtable as
672 * contained in the file. This newly-created #GvdbTable does not depend
673 * on the continued existence of @file.
675 * You should call gvdb_table_free() on the return result when you no
676 * longer require it.
678 * Returns: a new #GvdbTable, or %NULL
680 GvdbTable *
681 gvdb_table_get_table (GvdbTable *file,
682 const gchar *key)
684 const struct gvdb_hash_item *item;
685 GvdbTable *new;
687 item = gvdb_table_lookup (file, key, 'H');
689 if (item == NULL)
690 return NULL;
692 new = g_slice_new0 (GvdbTable);
693 new->bytes = g_bytes_ref (file->bytes);
694 new->byteswapped = file->byteswapped;
695 new->trusted = file->trusted;
696 new->data = file->data;
697 new->size = file->size;
699 gvdb_table_setup_root (new, &item->value.pointer);
701 return new;
705 * gvdb_table_free:
706 * @file: a #GvdbTable
708 * Frees @file.
710 void
711 gvdb_table_free (GvdbTable *file)
713 g_bytes_unref (file->bytes);
714 g_slice_free (GvdbTable, file);
718 * gvdb_table_is_valid:
719 * @table: a #GvdbTable
721 * Checks if the table is still valid.
723 * An on-disk GVDB can be marked as invalid. This happens when the file
724 * has been replaced. The appropriate action is typically to reopen the
725 * file.
727 * Returns: %TRUE if @table is still valid
729 gboolean
730 gvdb_table_is_valid (GvdbTable *table)
732 return !!*table->data;