diff(1): Commit our patches to contrib/ and get rid of -I-.
[dragonfly.git] / contrib / libarchive-2 / libarchive / archive_entry_link_resolver.c
blob4deee260d498891030d911be5dbcd63d505ead9e
1 /*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "archive_platform.h"
27 __FBSDID("$FreeBSD: src/lib/libarchive/archive_entry_link_resolver.c,v 1.3 2008/06/15 04:31:43 kientzle Exp $");
29 #ifdef HAVE_SYS_STAT_H
30 #include <sys/stat.h>
31 #endif
32 #ifdef HAVE_ERRNO_H
33 #include <errno.h>
34 #endif
35 #include <stdio.h>
36 #ifdef HAVE_STDLIB_H
37 #include <stdlib.h>
38 #endif
39 #ifdef HAVE_STRING_H
40 #include <string.h>
41 #endif
43 #include "archive.h"
44 #include "archive_entry.h"
47 * This is mostly a pretty straightforward hash table implementation.
48 * The only interesting bit is the different strategies used to
49 * match up links. These strategies match those used by various
50 * archiving formats:
51 * tar - content stored with first link, remainder refer back to it.
52 * This requires us to match each subsequent link up with the
53 * first appearance.
54 * cpio - Old cpio just stored body with each link, match-ups were
55 * implicit. This is trivial.
56 * new cpio - New cpio only stores body with last link, match-ups
57 * are implicit. This is actually quite tricky; see the notes
58 * below.
61 /* Users pass us a format code, we translate that into a strategy here. */
62 #define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0
63 #define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1
64 #define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2
65 #define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3
67 /* Initial size of link cache. */
68 #define links_cache_initial_size 1024
70 struct links_entry {
71 struct links_entry *next;
72 struct links_entry *previous;
73 int links; /* # links not yet seen */
74 int hash;
75 struct archive_entry *canonical;
76 struct archive_entry *entry;
79 struct archive_entry_linkresolver {
80 struct links_entry **buckets;
81 struct links_entry *spare;
82 unsigned long number_entries;
83 size_t number_buckets;
84 int strategy;
87 static struct links_entry *find_entry(struct archive_entry_linkresolver *,
88 struct archive_entry *);
89 static void grow_hash(struct archive_entry_linkresolver *);
90 static struct links_entry *insert_entry(struct archive_entry_linkresolver *,
91 struct archive_entry *);
92 static struct links_entry *next_entry(struct archive_entry_linkresolver *);
94 struct archive_entry_linkresolver *
95 archive_entry_linkresolver_new(void)
97 struct archive_entry_linkresolver *res;
98 size_t i;
100 res = malloc(sizeof(struct archive_entry_linkresolver));
101 if (res == NULL)
102 return (NULL);
103 memset(res, 0, sizeof(struct archive_entry_linkresolver));
104 res->number_buckets = links_cache_initial_size;
105 res->buckets = malloc(res->number_buckets *
106 sizeof(res->buckets[0]));
107 if (res->buckets == NULL) {
108 free(res);
109 return (NULL);
111 for (i = 0; i < res->number_buckets; i++)
112 res->buckets[i] = NULL;
113 return (res);
116 void
117 archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
118 int fmt)
120 int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
122 switch (fmtbase) {
123 case ARCHIVE_FORMAT_CPIO:
124 switch (fmt) {
125 case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
126 case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
127 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
128 break;
129 default:
130 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
131 break;
133 break;
134 case ARCHIVE_FORMAT_MTREE:
135 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE;
136 break;
137 case ARCHIVE_FORMAT_TAR:
138 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
139 break;
140 default:
141 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
142 break;
146 void
147 archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
149 struct links_entry *le;
151 if (res == NULL)
152 return;
154 if (res->buckets != NULL) {
155 while ((le = next_entry(res)) != NULL)
156 archive_entry_free(le->entry);
157 free(res->buckets);
158 res->buckets = NULL;
160 free(res);
163 void
164 archive_entry_linkify(struct archive_entry_linkresolver *res,
165 struct archive_entry **e, struct archive_entry **f)
167 struct links_entry *le;
168 struct archive_entry *t;
170 *f = NULL; /* Default: Don't return a second entry. */
172 if (*e == NULL) {
173 le = next_entry(res);
174 if (le != NULL) {
175 *e = le->entry;
176 le->entry = NULL;
178 return;
181 /* If it has only one link, then we're done. */
182 if (archive_entry_nlink(*e) == 1)
183 return;
184 /* Directories never have hardlinks. */
185 if (archive_entry_filetype(*e) == AE_IFDIR)
186 return;
188 switch (res->strategy) {
189 case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
190 le = find_entry(res, *e);
191 if (le != NULL) {
192 archive_entry_set_size(*e, 0);
193 archive_entry_copy_hardlink(*e,
194 archive_entry_pathname(le->canonical));
195 } else
196 insert_entry(res, *e);
197 return;
198 case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:
199 le = find_entry(res, *e);
200 if (le != NULL) {
201 archive_entry_copy_hardlink(*e,
202 archive_entry_pathname(le->canonical));
203 } else
204 insert_entry(res, *e);
205 return;
206 case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
207 /* This one is trivial. */
208 return;
209 case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
210 le = find_entry(res, *e);
211 if (le != NULL) {
213 * Put the new entry in le, return the
214 * old entry from le.
216 t = *e;
217 *e = le->entry;
218 le->entry = t;
219 /* Make the old entry into a hardlink. */
220 archive_entry_set_size(*e, 0);
221 archive_entry_copy_hardlink(*e,
222 archive_entry_pathname(le->canonical));
223 /* If we ran out of links, return the
224 * final entry as well. */
225 if (le->links == 0) {
226 *f = le->entry;
227 le->entry = NULL;
229 } else {
231 * If we haven't seen it, tuck it away
232 * for future use.
234 le = insert_entry(res, *e);
235 le->entry = *e;
236 *e = NULL;
238 return;
239 default:
240 break;
242 return;
245 static struct links_entry *
246 find_entry(struct archive_entry_linkresolver *res,
247 struct archive_entry *entry)
249 struct links_entry *le;
250 int hash, bucket;
251 dev_t dev;
252 ino_t ino;
254 /* Free a held entry. */
255 if (res->spare != NULL) {
256 archive_entry_free(res->spare->canonical);
257 archive_entry_free(res->spare->entry);
258 free(res->spare);
259 res->spare = NULL;
262 /* If the links cache overflowed and got flushed, don't bother. */
263 if (res->buckets == NULL)
264 return (NULL);
266 dev = archive_entry_dev(entry);
267 ino = archive_entry_ino(entry);
268 hash = dev ^ ino;
270 /* Try to locate this entry in the links cache. */
271 bucket = hash % res->number_buckets;
272 for (le = res->buckets[bucket]; le != NULL; le = le->next) {
273 if (le->hash == hash
274 && dev == archive_entry_dev(le->canonical)
275 && ino == archive_entry_ino(le->canonical)) {
277 * Decrement link count each time and release
278 * the entry if it hits zero. This saves
279 * memory and is necessary for detecting
280 * missed links.
282 --le->links;
283 if (le->links > 0)
284 return (le);
285 /* Remove it from this hash bucket. */
286 if (le->previous != NULL)
287 le->previous->next = le->next;
288 if (le->next != NULL)
289 le->next->previous = le->previous;
290 if (res->buckets[bucket] == le)
291 res->buckets[bucket] = le->next;
292 res->number_entries--;
293 /* Defer freeing this entry. */
294 res->spare = le;
295 return (le);
298 return (NULL);
301 static struct links_entry *
302 next_entry(struct archive_entry_linkresolver *res)
304 struct links_entry *le;
305 size_t bucket;
307 /* Free a held entry. */
308 if (res->spare != NULL) {
309 archive_entry_free(res->spare->canonical);
310 free(res->spare);
311 res->spare = NULL;
314 /* If the links cache overflowed and got flushed, don't bother. */
315 if (res->buckets == NULL)
316 return (NULL);
318 /* Look for next non-empty bucket in the links cache. */
319 for (bucket = 0; bucket < res->number_buckets; bucket++) {
320 le = res->buckets[bucket];
321 if (le != NULL) {
322 /* Remove it from this hash bucket. */
323 if (le->next != NULL)
324 le->next->previous = le->previous;
325 res->buckets[bucket] = le->next;
326 res->number_entries--;
327 /* Defer freeing this entry. */
328 res->spare = le;
329 return (le);
332 return (NULL);
335 static struct links_entry *
336 insert_entry(struct archive_entry_linkresolver *res,
337 struct archive_entry *entry)
339 struct links_entry *le;
340 int hash, bucket;
342 /* Add this entry to the links cache. */
343 le = malloc(sizeof(struct links_entry));
344 if (le == NULL)
345 return (NULL);
346 memset(le, 0, sizeof(*le));
347 le->canonical = archive_entry_clone(entry);
349 /* If the links cache is getting too full, enlarge the hash table. */
350 if (res->number_entries > res->number_buckets * 2)
351 grow_hash(res);
353 hash = archive_entry_dev(entry) ^ archive_entry_ino(entry);
354 bucket = hash % res->number_buckets;
356 /* If we could allocate the entry, record it. */
357 if (res->buckets[bucket] != NULL)
358 res->buckets[bucket]->previous = le;
359 res->number_entries++;
360 le->next = res->buckets[bucket];
361 le->previous = NULL;
362 res->buckets[bucket] = le;
363 le->hash = hash;
364 le->links = archive_entry_nlink(entry) - 1;
365 return (le);
368 static void
369 grow_hash(struct archive_entry_linkresolver *res)
371 struct links_entry *le, **new_buckets;
372 size_t new_size;
373 size_t i, bucket;
375 /* Try to enlarge the bucket list. */
376 new_size = res->number_buckets * 2;
377 new_buckets = malloc(new_size * sizeof(struct links_entry *));
379 if (new_buckets != NULL) {
380 memset(new_buckets, 0,
381 new_size * sizeof(struct links_entry *));
382 for (i = 0; i < res->number_buckets; i++) {
383 while (res->buckets[i] != NULL) {
384 /* Remove entry from old bucket. */
385 le = res->buckets[i];
386 res->buckets[i] = le->next;
388 /* Add entry to new bucket. */
389 bucket = le->hash % new_size;
391 if (new_buckets[bucket] != NULL)
392 new_buckets[bucket]->previous =
394 le->next = new_buckets[bucket];
395 le->previous = NULL;
396 new_buckets[bucket] = le;
399 free(res->buckets);
400 res->buckets = new_buckets;
401 res->number_buckets = new_size;