2 * Copyright (c) 2003-2007 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "archive_platform.h"
27 __FBSDID("$FreeBSD: src/lib/libarchive/archive_entry_link_resolver.c,v 1.1 2007/12/30 04:58:21 kientzle Exp $");
29 #ifdef HAVE_SYS_STAT_H
44 #include "archive_entry.h"
47 * This is mostly a pretty straightforward hash table implementation.
48 * The only interesting bit is the different strategies used to
49 * match up links. These strategies match those used by various
51 * tar - content stored with first link, remainder refer back to it.
52 * This requires us to match each subsequent link up with the
54 * cpio - Old cpio just stored body with each link, match-ups were
55 * implicit. This is trivial.
56 * new cpio - New cpio only stores body with last link, match-ups
57 * are implicit. This is actually quite tricky; see the notes
61 /* Users pass us a format code, we translate that into a strategy here. */
62 #define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0
63 #define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1
64 #define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2
65 #define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3
67 /* Initial size of link cache. */
68 #define links_cache_initial_size 1024
71 struct links_entry
*next
;
72 struct links_entry
*previous
;
73 int links
; /* # links not yet seen */
75 struct archive_entry
*canonical
;
76 struct archive_entry
*entry
;
79 struct archive_entry_linkresolver
{
80 struct links_entry
**buckets
;
81 struct links_entry
*spare
;
82 unsigned long number_entries
;
83 size_t number_buckets
;
87 static struct links_entry
*find_entry(struct archive_entry_linkresolver
*,
88 struct archive_entry
*);
89 static void grow_hash(struct archive_entry_linkresolver
*);
90 static struct links_entry
*insert_entry(struct archive_entry_linkresolver
*,
91 struct archive_entry
*);
92 static struct links_entry
*next_entry(struct archive_entry_linkresolver
*);
94 struct archive_entry_linkresolver
*
95 archive_entry_linkresolver_new(void)
97 struct archive_entry_linkresolver
*res
;
100 res
= malloc(sizeof(struct archive_entry_linkresolver
));
103 memset(res
, 0, sizeof(struct archive_entry_linkresolver
));
104 res
->number_buckets
= links_cache_initial_size
;
105 res
->buckets
= malloc(res
->number_buckets
*
106 sizeof(res
->buckets
[0]));
107 if (res
->buckets
== NULL
) {
111 for (i
= 0; i
< res
->number_buckets
; i
++)
112 res
->buckets
[i
] = NULL
;
117 archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver
*res
,
120 int fmtbase
= fmt
& ARCHIVE_FORMAT_BASE_MASK
;
123 case ARCHIVE_FORMAT_CPIO
:
125 case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC
:
126 case ARCHIVE_FORMAT_CPIO_SVR4_CRC
:
127 res
->strategy
= ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO
;
130 res
->strategy
= ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO
;
134 case ARCHIVE_FORMAT_MTREE
:
135 res
->strategy
= ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE
;
137 case ARCHIVE_FORMAT_TAR
:
138 res
->strategy
= ARCHIVE_ENTRY_LINKIFY_LIKE_TAR
;
141 res
->strategy
= ARCHIVE_ENTRY_LINKIFY_LIKE_TAR
;
147 archive_entry_linkresolver_free(struct archive_entry_linkresolver
*res
)
149 struct links_entry
*le
;
154 if (res
->buckets
!= NULL
) {
155 while ((le
= next_entry(res
)) != NULL
)
156 archive_entry_free(le
->entry
);
164 archive_entry_linkify(struct archive_entry_linkresolver
*res
,
165 struct archive_entry
**e
, struct archive_entry
**f
)
167 struct links_entry
*le
;
168 struct archive_entry
*t
;
170 *f
= NULL
; /* Default: Don't return a second entry. */
173 le
= next_entry(res
);
181 /* If it has only one link, then we're done. */
182 if (archive_entry_nlink(*e
) == 1)
185 switch (res
->strategy
) {
186 case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR
:
187 le
= find_entry(res
, *e
);
189 archive_entry_set_size(*e
, 0);
190 archive_entry_copy_hardlink(*e
,
191 archive_entry_pathname(le
->canonical
));
193 insert_entry(res
, *e
);
195 case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE
:
196 le
= find_entry(res
, *e
);
198 archive_entry_copy_hardlink(*e
,
199 archive_entry_pathname(le
->canonical
));
201 insert_entry(res
, *e
);
203 case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO
:
204 /* This one is trivial. */
206 case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO
:
207 le
= find_entry(res
, *e
);
210 * Put the new entry in le, return the
216 /* Make the old entry into a hardlink. */
217 archive_entry_set_size(*e
, 0);
218 archive_entry_copy_hardlink(*e
,
219 archive_entry_pathname(le
->canonical
));
220 /* If we ran out of links, return the
221 * final entry as well. */
222 if (le
->links
== 0) {
228 * If we haven't seen it, tuck it away
231 le
= insert_entry(res
, *e
);
242 static struct links_entry
*
243 find_entry(struct archive_entry_linkresolver
*res
,
244 struct archive_entry
*entry
)
246 struct links_entry
*le
;
251 /* Free a held entry. */
252 if (res
->spare
!= NULL
) {
253 archive_entry_free(res
->spare
->canonical
);
254 archive_entry_free(res
->spare
->entry
);
259 /* If the links cache overflowed and got flushed, don't bother. */
260 if (res
->buckets
== NULL
)
263 dev
= archive_entry_dev(entry
);
264 ino
= archive_entry_ino(entry
);
267 /* Try to locate this entry in the links cache. */
268 bucket
= hash
% res
->number_buckets
;
269 for (le
= res
->buckets
[bucket
]; le
!= NULL
; le
= le
->next
) {
271 && dev
== archive_entry_dev(le
->canonical
)
272 && ino
== archive_entry_ino(le
->canonical
)) {
274 * Decrement link count each time and release
275 * the entry if it hits zero. This saves
276 * memory and is necessary for detecting
282 /* Remove it from this hash bucket. */
283 if (le
->previous
!= NULL
)
284 le
->previous
->next
= le
->next
;
285 if (le
->next
!= NULL
)
286 le
->next
->previous
= le
->previous
;
287 if (res
->buckets
[bucket
] == le
)
288 res
->buckets
[bucket
] = le
->next
;
289 res
->number_entries
--;
290 /* Defer freeing this entry. */
298 static struct links_entry
*
299 next_entry(struct archive_entry_linkresolver
*res
)
301 struct links_entry
*le
;
304 /* Free a held entry. */
305 if (res
->spare
!= NULL
) {
306 archive_entry_free(res
->spare
->canonical
);
311 /* If the links cache overflowed and got flushed, don't bother. */
312 if (res
->buckets
== NULL
)
315 /* Look for next non-empty bucket in the links cache. */
316 for (bucket
= 0; bucket
< res
->number_buckets
; bucket
++) {
317 le
= res
->buckets
[bucket
];
319 /* Remove it from this hash bucket. */
320 if (le
->next
!= NULL
)
321 le
->next
->previous
= le
->previous
;
322 res
->buckets
[bucket
] = le
->next
;
323 res
->number_entries
--;
324 /* Defer freeing this entry. */
332 static struct links_entry
*
333 insert_entry(struct archive_entry_linkresolver
*res
,
334 struct archive_entry
*entry
)
336 struct links_entry
*le
;
339 /* Add this entry to the links cache. */
340 le
= malloc(sizeof(struct links_entry
));
343 memset(le
, 0, sizeof(*le
));
344 le
->canonical
= archive_entry_clone(entry
);
346 /* If the links cache is getting too full, enlarge the hash table. */
347 if (res
->number_entries
> res
->number_buckets
* 2)
350 hash
= archive_entry_dev(entry
) ^ archive_entry_ino(entry
);
351 bucket
= hash
% res
->number_buckets
;
353 /* If we could allocate the entry, record it. */
354 if (res
->buckets
[bucket
] != NULL
)
355 res
->buckets
[bucket
]->previous
= le
;
356 res
->number_entries
++;
357 le
->next
= res
->buckets
[bucket
];
359 res
->buckets
[bucket
] = le
;
361 le
->links
= archive_entry_nlink(entry
) - 1;
366 grow_hash(struct archive_entry_linkresolver
*res
)
368 struct links_entry
*le
, **new_buckets
;
372 /* Try to enlarge the bucket list. */
373 new_size
= res
->number_buckets
* 2;
374 new_buckets
= malloc(new_size
* sizeof(struct links_entry
*));
376 if (new_buckets
!= NULL
) {
377 memset(new_buckets
, 0,
378 new_size
* sizeof(struct links_entry
*));
379 for (i
= 0; i
< res
->number_buckets
; i
++) {
380 while (res
->buckets
[i
] != NULL
) {
381 /* Remove entry from old bucket. */
382 le
= res
->buckets
[i
];
383 res
->buckets
[i
] = le
->next
;
385 /* Add entry to new bucket. */
386 bucket
= le
->hash
% new_size
;
388 if (new_buckets
[bucket
] != NULL
)
389 new_buckets
[bucket
]->previous
=
391 le
->next
= new_buckets
[bucket
];
393 new_buckets
[bucket
] = le
;
397 res
->buckets
= new_buckets
;
398 res
->number_buckets
= new_size
;