Import libarchive-2.5.4b.
[dragonfly.git] / contrib / libarchive-2 / libarchive / archive_entry_link_resolver.c
blobc0770c239de2d4f8bcc0a3f5370c169787fb54a5
1 /*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "archive_platform.h"
27 __FBSDID("$FreeBSD: src/lib/libarchive/archive_entry_link_resolver.c,v 1.1 2007/12/30 04:58:21 kientzle Exp $");
29 #ifdef HAVE_SYS_STAT_H
30 #include <sys/stat.h>
31 #endif
32 #ifdef HAVE_ERRNO_H
33 #include <errno.h>
34 #endif
35 #include <stdio.h>
36 #ifdef HAVE_STDLIB_H
37 #include <stdlib.h>
38 #endif
39 #ifdef HAVE_STRING_H
40 #include <string.h>
41 #endif
43 #include "archive.h"
44 #include "archive_entry.h"
47 * This is mostly a pretty straightforward hash table implementation.
48 * The only interesting bit is the different strategies used to
49 * match up links. These strategies match those used by various
50 * archiving formats:
51 * tar - content stored with first link, remainder refer back to it.
52 * This requires us to match each subsequent link up with the
53 * first appearance.
54 * cpio - Old cpio just stored body with each link, match-ups were
55 * implicit. This is trivial.
56 * new cpio - New cpio only stores body with last link, match-ups
57 * are implicit. This is actually quite tricky; see the notes
58 * below.
61 /* Users pass us a format code, we translate that into a strategy here. */
62 #define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0
63 #define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1
64 #define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2
65 #define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3
67 /* Initial size of link cache. */
68 #define links_cache_initial_size 1024
70 struct links_entry {
71 struct links_entry *next;
72 struct links_entry *previous;
73 int links; /* # links not yet seen */
74 int hash;
75 struct archive_entry *canonical;
76 struct archive_entry *entry;
79 struct archive_entry_linkresolver {
80 struct links_entry **buckets;
81 struct links_entry *spare;
82 unsigned long number_entries;
83 size_t number_buckets;
84 int strategy;
87 static struct links_entry *find_entry(struct archive_entry_linkresolver *,
88 struct archive_entry *);
89 static void grow_hash(struct archive_entry_linkresolver *);
90 static struct links_entry *insert_entry(struct archive_entry_linkresolver *,
91 struct archive_entry *);
92 static struct links_entry *next_entry(struct archive_entry_linkresolver *);
94 struct archive_entry_linkresolver *
95 archive_entry_linkresolver_new(void)
97 struct archive_entry_linkresolver *res;
98 size_t i;
100 res = malloc(sizeof(struct archive_entry_linkresolver));
101 if (res == NULL)
102 return (NULL);
103 memset(res, 0, sizeof(struct archive_entry_linkresolver));
104 res->number_buckets = links_cache_initial_size;
105 res->buckets = malloc(res->number_buckets *
106 sizeof(res->buckets[0]));
107 if (res->buckets == NULL) {
108 free(res);
109 return (NULL);
111 for (i = 0; i < res->number_buckets; i++)
112 res->buckets[i] = NULL;
113 return (res);
116 void
117 archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver *res,
118 int fmt)
120 int fmtbase = fmt & ARCHIVE_FORMAT_BASE_MASK;
122 switch (fmtbase) {
123 case ARCHIVE_FORMAT_CPIO:
124 switch (fmt) {
125 case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC:
126 case ARCHIVE_FORMAT_CPIO_SVR4_CRC:
127 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO;
128 break;
129 default:
130 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO;
131 break;
133 break;
134 case ARCHIVE_FORMAT_MTREE:
135 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE;
136 break;
137 case ARCHIVE_FORMAT_TAR:
138 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
139 break;
140 default:
141 res->strategy = ARCHIVE_ENTRY_LINKIFY_LIKE_TAR;
142 break;
146 void
147 archive_entry_linkresolver_free(struct archive_entry_linkresolver *res)
149 struct links_entry *le;
151 if (res == NULL)
152 return;
154 if (res->buckets != NULL) {
155 while ((le = next_entry(res)) != NULL)
156 archive_entry_free(le->entry);
157 free(res->buckets);
158 res->buckets = NULL;
160 free(res);
163 void
164 archive_entry_linkify(struct archive_entry_linkresolver *res,
165 struct archive_entry **e, struct archive_entry **f)
167 struct links_entry *le;
168 struct archive_entry *t;
170 *f = NULL; /* Default: Don't return a second entry. */
172 if (*e == NULL) {
173 le = next_entry(res);
174 if (le != NULL) {
175 *e = le->entry;
176 le->entry = NULL;
178 return;
181 /* If it has only one link, then we're done. */
182 if (archive_entry_nlink(*e) == 1)
183 return;
185 switch (res->strategy) {
186 case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR:
187 le = find_entry(res, *e);
188 if (le != NULL) {
189 archive_entry_set_size(*e, 0);
190 archive_entry_copy_hardlink(*e,
191 archive_entry_pathname(le->canonical));
192 } else
193 insert_entry(res, *e);
194 return;
195 case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE:
196 le = find_entry(res, *e);
197 if (le != NULL) {
198 archive_entry_copy_hardlink(*e,
199 archive_entry_pathname(le->canonical));
200 } else
201 insert_entry(res, *e);
202 return;
203 case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO:
204 /* This one is trivial. */
205 return;
206 case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO:
207 le = find_entry(res, *e);
208 if (le != NULL) {
210 * Put the new entry in le, return the
211 * old entry from le.
213 t = *e;
214 *e = le->entry;
215 le->entry = t;
216 /* Make the old entry into a hardlink. */
217 archive_entry_set_size(*e, 0);
218 archive_entry_copy_hardlink(*e,
219 archive_entry_pathname(le->canonical));
220 /* If we ran out of links, return the
221 * final entry as well. */
222 if (le->links == 0) {
223 *f = le->entry;
224 le->entry = NULL;
226 } else {
228 * If we haven't seen it, tuck it away
229 * for future use.
231 le = insert_entry(res, *e);
232 le->entry = *e;
233 *e = NULL;
235 return;
236 default:
237 break;
239 return;
242 static struct links_entry *
243 find_entry(struct archive_entry_linkresolver *res,
244 struct archive_entry *entry)
246 struct links_entry *le;
247 int hash, bucket;
248 dev_t dev;
249 ino_t ino;
251 /* Free a held entry. */
252 if (res->spare != NULL) {
253 archive_entry_free(res->spare->canonical);
254 archive_entry_free(res->spare->entry);
255 free(res->spare);
256 res->spare = NULL;
259 /* If the links cache overflowed and got flushed, don't bother. */
260 if (res->buckets == NULL)
261 return (NULL);
263 dev = archive_entry_dev(entry);
264 ino = archive_entry_ino(entry);
265 hash = dev ^ ino;
267 /* Try to locate this entry in the links cache. */
268 bucket = hash % res->number_buckets;
269 for (le = res->buckets[bucket]; le != NULL; le = le->next) {
270 if (le->hash == hash
271 && dev == archive_entry_dev(le->canonical)
272 && ino == archive_entry_ino(le->canonical)) {
274 * Decrement link count each time and release
275 * the entry if it hits zero. This saves
276 * memory and is necessary for detecting
277 * missed links.
279 --le->links;
280 if (le->links > 0)
281 return (le);
282 /* Remove it from this hash bucket. */
283 if (le->previous != NULL)
284 le->previous->next = le->next;
285 if (le->next != NULL)
286 le->next->previous = le->previous;
287 if (res->buckets[bucket] == le)
288 res->buckets[bucket] = le->next;
289 res->number_entries--;
290 /* Defer freeing this entry. */
291 res->spare = le;
292 return (le);
295 return (NULL);
298 static struct links_entry *
299 next_entry(struct archive_entry_linkresolver *res)
301 struct links_entry *le;
302 size_t bucket;
304 /* Free a held entry. */
305 if (res->spare != NULL) {
306 archive_entry_free(res->spare->canonical);
307 free(res->spare);
308 res->spare = NULL;
311 /* If the links cache overflowed and got flushed, don't bother. */
312 if (res->buckets == NULL)
313 return (NULL);
315 /* Look for next non-empty bucket in the links cache. */
316 for (bucket = 0; bucket < res->number_buckets; bucket++) {
317 le = res->buckets[bucket];
318 if (le != NULL) {
319 /* Remove it from this hash bucket. */
320 if (le->next != NULL)
321 le->next->previous = le->previous;
322 res->buckets[bucket] = le->next;
323 res->number_entries--;
324 /* Defer freeing this entry. */
325 res->spare = le;
326 return (le);
329 return (NULL);
332 static struct links_entry *
333 insert_entry(struct archive_entry_linkresolver *res,
334 struct archive_entry *entry)
336 struct links_entry *le;
337 int hash, bucket;
339 /* Add this entry to the links cache. */
340 le = malloc(sizeof(struct links_entry));
341 if (le == NULL)
342 return (NULL);
343 memset(le, 0, sizeof(*le));
344 le->canonical = archive_entry_clone(entry);
346 /* If the links cache is getting too full, enlarge the hash table. */
347 if (res->number_entries > res->number_buckets * 2)
348 grow_hash(res);
350 hash = archive_entry_dev(entry) ^ archive_entry_ino(entry);
351 bucket = hash % res->number_buckets;
353 /* If we could allocate the entry, record it. */
354 if (res->buckets[bucket] != NULL)
355 res->buckets[bucket]->previous = le;
356 res->number_entries++;
357 le->next = res->buckets[bucket];
358 le->previous = NULL;
359 res->buckets[bucket] = le;
360 le->hash = hash;
361 le->links = archive_entry_nlink(entry) - 1;
362 return (le);
365 static void
366 grow_hash(struct archive_entry_linkresolver *res)
368 struct links_entry *le, **new_buckets;
369 size_t new_size;
370 size_t i, bucket;
372 /* Try to enlarge the bucket list. */
373 new_size = res->number_buckets * 2;
374 new_buckets = malloc(new_size * sizeof(struct links_entry *));
376 if (new_buckets != NULL) {
377 memset(new_buckets, 0,
378 new_size * sizeof(struct links_entry *));
379 for (i = 0; i < res->number_buckets; i++) {
380 while (res->buckets[i] != NULL) {
381 /* Remove entry from old bucket. */
382 le = res->buckets[i];
383 res->buckets[i] = le->next;
385 /* Add entry to new bucket. */
386 bucket = le->hash % new_size;
388 if (new_buckets[bucket] != NULL)
389 new_buckets[bucket]->previous =
391 le->next = new_buckets[bucket];
392 le->previous = NULL;
393 new_buckets[bucket] = le;
396 free(res->buckets);
397 res->buckets = new_buckets;
398 res->number_buckets = new_size;