r4895: New MIME type for Jar archives.
[rox-filer.git] / ROX-Filer / src / xdgmimecache.c
blobbfa02862b559cbd243bffe50b46966432d5f27ee
1 /* -*- mode: C; c-file-style: "gnu" -*- */
2 /* xdgmimealias.c: Private file. mmappable caches for mime data
4 * More info can be found at http://www.freedesktop.org/standards/
6 * Copyright (C) 2005 Matthias Clasen <mclasen@redhat.com>
8 * Licensed under the Academic Free License version 2.0
9 * Or under the following terms:
11 * This library is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2 of the License, or (at your option) any later version.
16 * This library is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with this library; if not, write to the
23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 * Boston, MA 02111-1307, USA.
27 #ifdef HAVE_CONFIG_H
28 #include <config.h>
29 #endif
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
35 #include <fcntl.h>
36 #include <unistd.h>
37 #include <fnmatch.h>
38 #include <assert.h>
40 #include <netinet/in.h> /* for ntohl/ntohs */
42 #ifdef HAVE_MMAP
43 #include <sys/mman.h>
44 #endif
46 #include <sys/stat.h>
47 #include <sys/types.h>
49 #include "xdgmimecache.h"
50 #include "xdgmimeint.h"
52 #ifndef MAX
53 #define MAX(a,b) ((a) > (b) ? (a) : (b))
54 #endif
56 #ifndef FALSE
57 #define FALSE (0)
58 #endif
60 #ifndef TRUE
61 #define TRUE (!FALSE)
62 #endif
64 #ifndef _O_BINARY
65 #define _O_BINARY 0
66 #endif
68 #define MAJOR_VERSION 1
69 #define MINOR_VERSION 0
71 extern XdgMimeCache **caches;
72 extern int n_caches;
74 struct _XdgMimeCache
76 int ref_count;
78 size_t size;
79 char *buffer;
82 #define GET_UINT16(cache,offset) (ntohs(*(uint16_t*)((cache) + (offset))))
83 #define GET_UINT32(cache,offset) (ntohl(*(uint32_t*)((cache) + (offset))))
85 XdgMimeCache *
86 _xdg_mime_cache_ref (XdgMimeCache *cache)
88 cache->ref_count++;
89 return cache;
92 void
93 _xdg_mime_cache_unref (XdgMimeCache *cache)
95 cache->ref_count--;
97 if (cache->ref_count == 0)
99 #ifdef HAVE_MMAP
100 munmap (cache->buffer, cache->size);
101 #endif
102 free (cache);
106 XdgMimeCache *
107 _xdg_mime_cache_new_from_file (const char *file_name)
109 XdgMimeCache *cache = NULL;
111 #ifdef HAVE_MMAP
112 int fd = -1;
113 struct stat st;
114 char *buffer = NULL;
116 /* Open the file and map it into memory */
117 fd = open (file_name, O_RDONLY|_O_BINARY, 0);
119 if (fd < 0)
120 return NULL;
122 if (fstat (fd, &st) < 0 || st.st_size < 4)
123 goto done;
125 buffer = (char *) mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
127 if (buffer == MAP_FAILED)
128 goto done;
130 /* Verify version */
131 if (GET_UINT16 (buffer, 0) != MAJOR_VERSION ||
132 GET_UINT16 (buffer, 2) != MINOR_VERSION)
134 munmap (buffer, st.st_size);
136 goto done;
139 cache = (XdgMimeCache *) malloc (sizeof (XdgMimeCache));
140 cache->ref_count = 1;
141 cache->buffer = buffer;
142 cache->size = st.st_size;
144 done:
145 if (fd != -1)
146 close (fd);
148 #endif /* HAVE_MMAP */
150 return cache;
153 static int
154 cache_magic_matchlet_compare_to_data (XdgMimeCache *cache,
155 xdg_uint32_t offset,
156 const void *data,
157 size_t len)
159 xdg_uint32_t range_start = GET_UINT32 (cache->buffer, offset);
160 xdg_uint32_t range_length = GET_UINT32 (cache->buffer, offset + 4);
161 xdg_uint32_t data_length = GET_UINT32 (cache->buffer, offset + 12);
162 xdg_uint32_t data_offset = GET_UINT32 (cache->buffer, offset + 16);
163 xdg_uint32_t mask_offset = GET_UINT32 (cache->buffer, offset + 20);
165 int i, j;
167 for (i = range_start; i <= range_start + range_length; i++)
169 int valid_matchlet = TRUE;
171 if (i + data_length > len)
172 return FALSE;
174 if (mask_offset)
176 for (j = 0; j < data_length; j++)
178 if ((cache->buffer[data_offset + j] & cache->buffer[mask_offset + j]) !=
179 ((((unsigned char *) data)[j + i]) & cache->buffer[mask_offset + j]))
181 valid_matchlet = FALSE;
182 break;
186 else
188 for (j = 0; j < data_length; j++)
190 if (cache->buffer[data_offset + j] != ((unsigned char *) data)[j + i])
192 valid_matchlet = FALSE;
193 break;
198 if (valid_matchlet)
199 return TRUE;
202 return FALSE;
205 static int
206 cache_magic_matchlet_compare (XdgMimeCache *cache,
207 xdg_uint32_t offset,
208 const void *data,
209 size_t len)
211 xdg_uint32_t n_children = GET_UINT32 (cache->buffer, offset + 24);
212 xdg_uint32_t child_offset = GET_UINT32 (cache->buffer, offset + 28);
214 int i;
216 if (cache_magic_matchlet_compare_to_data (cache, offset, data, len))
218 if (n_children == 0)
219 return TRUE;
221 for (i = 0; i < n_children; i++)
223 if (cache_magic_matchlet_compare (cache, child_offset + 32 * i,
224 data, len))
225 return TRUE;
229 return FALSE;
232 static const char *
233 cache_magic_compare_to_data (XdgMimeCache *cache,
234 xdg_uint32_t offset,
235 const void *data,
236 size_t len,
237 int *prio)
239 xdg_uint32_t priority = GET_UINT32 (cache->buffer, offset);
240 xdg_uint32_t mimetype_offset = GET_UINT32 (cache->buffer, offset + 4);
241 xdg_uint32_t n_matchlets = GET_UINT32 (cache->buffer, offset + 8);
242 xdg_uint32_t matchlet_offset = GET_UINT32 (cache->buffer, offset + 12);
244 int i;
246 for (i = 0; i < n_matchlets; i++)
248 if (cache_magic_matchlet_compare (cache, matchlet_offset + i * 32,
249 data, len))
251 *prio = priority;
253 return cache->buffer + mimetype_offset;
257 return NULL;
260 static const char *
261 cache_magic_lookup_data (XdgMimeCache *cache,
262 const void *data,
263 size_t len,
264 int *prio)
266 xdg_uint32_t list_offset;
267 xdg_uint32_t n_entries;
268 xdg_uint32_t offset;
270 int j;
272 *prio = 0;
274 list_offset = GET_UINT32 (cache->buffer, 24);
275 n_entries = GET_UINT32 (cache->buffer, list_offset);
276 offset = GET_UINT32 (cache->buffer, list_offset + 8);
278 for (j = 0; j < n_entries; j++)
280 const char *match = cache_magic_compare_to_data (cache, offset + 16 * j,
281 data, len, prio);
282 if (match)
283 return match;
286 return NULL;
289 static const char *
290 cache_alias_lookup (const char *alias)
292 const char *ptr;
293 int i, min, max, mid, cmp;
295 for (i = 0; i < n_caches; i++)
297 XdgMimeCache *cache = caches[i];
298 xdg_uint32_t list_offset = GET_UINT32 (cache->buffer, 4 );
299 xdg_uint32_t n_entries = GET_UINT32 (cache->buffer, list_offset);
300 xdg_uint32_t offset;
302 min = 0;
303 max = n_entries - 1;
304 while (max >= min)
306 mid = (min + max) / 2;
308 offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * mid);
309 ptr = cache->buffer + offset;
310 cmp = strcmp (ptr, alias);
312 if (cmp < 0)
313 min = mid + 1;
314 else if (cmp > 0)
315 max = mid - 1;
316 else
318 offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * mid + 4);
319 return cache->buffer + offset;
324 return NULL;
327 static const char *
328 cache_glob_lookup_literal (const char *file_name)
330 const char *ptr;
331 int i, min, max, mid, cmp;
333 for (i = 0; i < n_caches; i++)
335 XdgMimeCache *cache = caches[i];
336 xdg_uint32_t list_offset = GET_UINT32 (cache->buffer, 12);
337 xdg_uint32_t n_entries = GET_UINT32 (cache->buffer, list_offset);
338 xdg_uint32_t offset;
340 min = 0;
341 max = n_entries - 1;
342 while (max >= min)
344 mid = (min + max) / 2;
346 offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * mid);
347 ptr = cache->buffer + offset;
348 cmp = strcmp (ptr, file_name);
350 if (cmp < 0)
351 min = mid + 1;
352 else if (cmp > 0)
353 max = mid - 1;
354 else
356 offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * mid + 4);
357 return cache->buffer + offset;
362 return NULL;
365 static const char *
366 cache_glob_lookup_fnmatch (const char *file_name)
368 const char *mime_type;
369 const char *ptr;
371 int i, j;
373 for (i = 0; i < n_caches; i++)
375 XdgMimeCache *cache = caches[i];
377 xdg_uint32_t list_offset = GET_UINT32 (cache->buffer, 20);
378 xdg_uint32_t n_entries = GET_UINT32 (cache->buffer, list_offset);
380 for (j = 0; j < n_entries; j++)
382 xdg_uint32_t offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * j);
383 xdg_uint32_t mimetype_offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * j + 4);
384 ptr = cache->buffer + offset;
385 mime_type = cache->buffer + mimetype_offset;
387 /* FIXME: Not UTF-8 safe */
388 if (fnmatch (ptr, file_name, 0) == 0)
389 return mime_type;
393 return NULL;
396 static const char *
397 cache_glob_node_lookup_suffix (XdgMimeCache *cache,
398 xdg_uint32_t n_entries,
399 xdg_uint32_t offset,
400 const char *suffix,
401 int ignore_case)
403 xdg_unichar_t character;
404 xdg_unichar_t match_char;
405 xdg_uint32_t mimetype_offset;
406 xdg_uint32_t n_children;
407 xdg_uint32_t child_offset;
409 int min, max, mid;
411 character = _xdg_utf8_to_ucs4 (suffix);
412 if (ignore_case)
413 character = _xdg_ucs4_to_lower (character);
415 min = 0;
416 max = n_entries - 1;
417 while (max >= min)
419 mid = (min + max) / 2;
421 match_char = GET_UINT32 (cache->buffer, offset + 16 * mid);
423 if (match_char < character)
424 min = mid + 1;
425 else if (match_char > character)
426 max = mid - 1;
427 else
429 suffix = _xdg_utf8_next_char (suffix);
430 if (*suffix == '\0')
432 mimetype_offset = GET_UINT32 (cache->buffer, offset + 16 * mid + 4);
434 return cache->buffer + mimetype_offset;
436 else
438 n_children = GET_UINT32 (cache->buffer, offset + 16 * mid + 8);
439 child_offset = GET_UINT32 (cache->buffer, offset + 16 * mid + 12);
441 return cache_glob_node_lookup_suffix (cache,
442 n_children, child_offset,
443 suffix, ignore_case);
448 return NULL;
451 static const char *
452 cache_glob_lookup_suffix (const char *suffix,
453 int ignore_case)
455 const char *mime_type;
457 int i;
459 for (i = 0; i < n_caches; i++)
461 XdgMimeCache *cache = caches[i];
463 xdg_uint32_t list_offset = GET_UINT32 (cache->buffer, 16);
464 xdg_uint32_t n_entries = GET_UINT32 (cache->buffer, list_offset);
465 xdg_uint32_t offset = GET_UINT32 (cache->buffer, list_offset + 4);
467 mime_type = cache_glob_node_lookup_suffix (cache,
468 n_entries, offset,
469 suffix, ignore_case);
470 if (mime_type)
471 return mime_type;
474 return NULL;
477 static void
478 find_stopchars (char *stopchars)
480 int i, j, k, l;
482 k = 0;
483 for (i = 0; i < n_caches; i++)
485 XdgMimeCache *cache = caches[i];
487 xdg_uint32_t list_offset = GET_UINT32 (cache->buffer, 16);
488 xdg_uint32_t n_entries = GET_UINT32 (cache->buffer, list_offset);
489 xdg_uint32_t offset = GET_UINT32 (cache->buffer, list_offset + 4);
491 for (j = 0; j < n_entries; j++)
493 xdg_uint32_t match_char = GET_UINT32 (cache->buffer, offset);
495 if (match_char < 128)
497 for (l = 0; l < k; l++)
498 if (stopchars[l] == match_char)
499 break;
500 if (l == k)
502 stopchars[k] = (char) match_char;
503 k++;
507 offset += 16;
511 stopchars[k] = '\0';
514 static const char *
515 cache_glob_lookup_file_name (const char *file_name)
517 const char *mime_type;
518 const char *ptr;
519 char stopchars[128];
521 assert (file_name != NULL);
523 /* First, check the literals */
524 mime_type = cache_glob_lookup_literal (file_name);
525 if (mime_type)
526 return mime_type;
528 find_stopchars (stopchars);
530 /* Next, check suffixes */
531 ptr = strpbrk (file_name, stopchars);
532 while (ptr)
534 mime_type = cache_glob_lookup_suffix (ptr, FALSE);
535 if (mime_type != NULL)
536 return mime_type;
538 mime_type = cache_glob_lookup_suffix (ptr, TRUE);
539 if (mime_type != NULL)
540 return mime_type;
542 ptr = strpbrk (ptr + 1, stopchars);
545 /* Last, try fnmatch */
546 return cache_glob_lookup_fnmatch (file_name);
550 _xdg_mime_cache_get_max_buffer_extents (void)
552 xdg_uint32_t offset;
553 xdg_uint32_t max_extent;
554 int i;
556 max_extent = 0;
557 for (i = 0; i < n_caches; i++)
559 XdgMimeCache *cache = caches[i];
561 offset = GET_UINT32 (cache->buffer, 24);
562 max_extent = MAX (max_extent, GET_UINT32 (cache->buffer, offset + 4));
565 return max_extent;
568 const char *
569 _xdg_mime_cache_get_mime_type_for_data (const void *data,
570 size_t len)
572 const char *mime_type;
573 int i, priority;
575 priority = 0;
576 mime_type = NULL;
577 for (i = 0; i < n_caches; i++)
579 XdgMimeCache *cache = caches[i];
581 int prio;
582 const char *match;
584 match = cache_magic_lookup_data (cache, data, len, &prio);
585 if (prio > priority)
587 priority = prio;
588 mime_type = match;
592 if (priority > 0)
593 return mime_type;
595 return XDG_MIME_TYPE_UNKNOWN;
598 const char *
599 _xdg_mime_cache_get_mime_type_for_file (const char *file_name)
601 const char *mime_type;
602 FILE *file;
603 unsigned char *data;
604 int max_extent;
605 int bytes_read;
606 struct stat statbuf;
607 const char *base_name;
609 if (file_name == NULL)
610 return NULL;
612 if (! _xdg_utf8_validate (file_name))
613 return NULL;
615 base_name = _xdg_get_base_name (file_name);
616 mime_type = _xdg_mime_cache_get_mime_type_from_file_name (base_name);
618 if (mime_type != XDG_MIME_TYPE_UNKNOWN)
619 return mime_type;
621 if (stat (file_name, &statbuf) != 0)
622 return XDG_MIME_TYPE_UNKNOWN;
624 if (!S_ISREG (statbuf.st_mode))
625 return XDG_MIME_TYPE_UNKNOWN;
627 /* FIXME: Need to make sure that max_extent isn't totally broken. This could
628 * be large and need getting from a stream instead of just reading it all
629 * in. */
630 max_extent = _xdg_mime_cache_get_max_buffer_extents ();
631 data = malloc (max_extent);
632 if (data == NULL)
633 return XDG_MIME_TYPE_UNKNOWN;
635 file = fopen (file_name, "r");
636 if (file == NULL)
638 free (data);
639 return XDG_MIME_TYPE_UNKNOWN;
642 bytes_read = fread (data, 1, max_extent, file);
643 if (ferror (file))
645 free (data);
646 fclose (file);
647 return XDG_MIME_TYPE_UNKNOWN;
650 mime_type = _xdg_mime_cache_get_mime_type_for_data (data, bytes_read);
652 free (data);
653 fclose (file);
655 return mime_type;
658 const char *
659 _xdg_mime_cache_get_mime_type_from_file_name (const char *file_name)
661 const char *mime_type;
663 mime_type = cache_glob_lookup_file_name (file_name);
665 if (mime_type)
666 return mime_type;
667 else
668 return XDG_MIME_TYPE_UNKNOWN;
671 #if 1
672 static int
673 is_super_type (const char *mime)
675 int length;
676 const char *type;
678 length = strlen (mime);
679 type = &(mime[length - 2]);
681 if (strcmp (type, "/*") == 0)
682 return 1;
684 return 0;
686 #endif
689 _xdg_mime_cache_mime_type_subclass (const char *mime,
690 const char *base)
692 const char *umime, *ubase;
694 int i, j, min, max, med, cmp;
696 umime = _xdg_mime_cache_unalias_mime_type (mime);
697 ubase = _xdg_mime_cache_unalias_mime_type (base);
699 if (strcmp (umime, ubase) == 0)
700 return 1;
702 /* We really want to handle text/ * in GtkFileFilter, so we just
703 * turn on the supertype matching
705 #if 1
706 /* Handle supertypes */
707 if (is_super_type (ubase) &&
708 xdg_mime_media_type_equal (umime, ubase))
709 return 1;
710 #endif
712 /* Handle special cases text/plain and application/octet-stream */
713 if (strcmp (ubase, "text/plain") == 0 &&
714 strncmp (umime, "text/", 5) == 0)
715 return 1;
717 if (strcmp (ubase, "application/octet-stream") == 0)
718 return 1;
720 for (i = 0; i < n_caches; i++)
722 XdgMimeCache *cache = caches[i];
724 xdg_uint32_t list_offset = GET_UINT32 (cache->buffer, 8);
725 xdg_uint32_t n_entries = GET_UINT32 (cache->buffer, list_offset);
726 xdg_uint32_t offset, n_parents, parent_offset;
728 min = 0;
729 max = n_entries - 1;
730 while (max >= min)
732 med = (min + max)/2;
734 offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * med);
735 cmp = strcmp (cache->buffer + offset, umime);
736 if (cmp < 0)
737 min = med + 1;
738 else if (cmp > 0)
739 max = med - 1;
740 else
742 offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * med + 4);
743 n_parents = GET_UINT32 (cache->buffer, offset);
745 for (j = 0; j < n_parents; j++)
747 parent_offset = GET_UINT32 (cache->buffer, offset + 4 + 4 * j);
748 if (_xdg_mime_cache_mime_type_subclass (cache->buffer + parent_offset, ubase))
749 return 1;
752 break;
757 return 0;
760 const char *
761 _xdg_mime_cache_unalias_mime_type (const char *mime)
763 const char *lookup;
765 lookup = cache_alias_lookup (mime);
767 if (lookup)
768 return lookup;
770 return mime;
773 char **
774 _xdg_mime_cache_list_mime_parents (const char *mime)
776 int i, j, p;
777 char *all_parents[128]; /* we'll stop at 128 */
778 char **result;
780 p = 0;
781 for (i = 0; i < n_caches; i++)
783 XdgMimeCache *cache = caches[i];
785 xdg_uint32_t list_offset = GET_UINT32 (cache->buffer, 8);
786 xdg_uint32_t n_entries = GET_UINT32 (cache->buffer, list_offset);
788 for (j = 0; j < n_entries; j++)
790 xdg_uint32_t mimetype_offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * i);
791 xdg_uint32_t parents_offset = GET_UINT32 (cache->buffer, list_offset + 4 + 8 * i + 4);
793 if (strcmp (cache->buffer + mimetype_offset, mime) == 0)
795 xdg_uint32_t n_parents = GET_UINT32 (cache->buffer, parents_offset);
797 for (j = 0; j < n_parents; j++)
798 all_parents[p++] = cache->buffer + parents_offset + 4 + 4 * j;
800 break;
804 all_parents[p++] = 0;
806 result = (char **) malloc (p * sizeof (char *));
807 memcpy (result, all_parents, p * sizeof (char *));
809 return result;