2 * Copyright (c) 2003-2007 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "archive_platform.h"
27 __FBSDID("$FreeBSD: src/lib/libarchive/archive_entry_link_resolver.c,v 1.3 2008/06/15 04:31:43 kientzle Exp $");
29 #ifdef HAVE_SYS_STAT_H
44 #include "archive_entry.h"
47 * This is mostly a pretty straightforward hash table implementation.
48 * The only interesting bit is the different strategies used to
49 * match up links. These strategies match those used by various
51 * tar - content stored with first link, remainder refer back to it.
52 * This requires us to match each subsequent link up with the
54 * cpio - Old cpio just stored body with each link, match-ups were
55 * implicit. This is trivial.
56 * new cpio - New cpio only stores body with last link, match-ups
57 * are implicit. This is actually quite tricky; see the notes
61 /* Users pass us a format code, we translate that into a strategy here. */
62 #define ARCHIVE_ENTRY_LINKIFY_LIKE_TAR 0
63 #define ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE 1
64 #define ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO 2
65 #define ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO 3
67 /* Initial size of link cache. */
68 #define links_cache_initial_size 1024
71 struct links_entry
*next
;
72 struct links_entry
*previous
;
73 int links
; /* # links not yet seen */
75 struct archive_entry
*canonical
;
76 struct archive_entry
*entry
;
79 struct archive_entry_linkresolver
{
80 struct links_entry
**buckets
;
81 struct links_entry
*spare
;
82 unsigned long number_entries
;
83 size_t number_buckets
;
87 static struct links_entry
*find_entry(struct archive_entry_linkresolver
*,
88 struct archive_entry
*);
89 static void grow_hash(struct archive_entry_linkresolver
*);
90 static struct links_entry
*insert_entry(struct archive_entry_linkresolver
*,
91 struct archive_entry
*);
92 static struct links_entry
*next_entry(struct archive_entry_linkresolver
*);
94 struct archive_entry_linkresolver
*
95 archive_entry_linkresolver_new(void)
97 struct archive_entry_linkresolver
*res
;
100 res
= malloc(sizeof(struct archive_entry_linkresolver
));
103 memset(res
, 0, sizeof(struct archive_entry_linkresolver
));
104 res
->number_buckets
= links_cache_initial_size
;
105 res
->buckets
= malloc(res
->number_buckets
*
106 sizeof(res
->buckets
[0]));
107 if (res
->buckets
== NULL
) {
111 for (i
= 0; i
< res
->number_buckets
; i
++)
112 res
->buckets
[i
] = NULL
;
117 archive_entry_linkresolver_set_strategy(struct archive_entry_linkresolver
*res
,
120 int fmtbase
= fmt
& ARCHIVE_FORMAT_BASE_MASK
;
123 case ARCHIVE_FORMAT_CPIO
:
125 case ARCHIVE_FORMAT_CPIO_SVR4_NOCRC
:
126 case ARCHIVE_FORMAT_CPIO_SVR4_CRC
:
127 res
->strategy
= ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO
;
130 res
->strategy
= ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO
;
134 case ARCHIVE_FORMAT_MTREE
:
135 res
->strategy
= ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE
;
137 case ARCHIVE_FORMAT_TAR
:
138 res
->strategy
= ARCHIVE_ENTRY_LINKIFY_LIKE_TAR
;
141 res
->strategy
= ARCHIVE_ENTRY_LINKIFY_LIKE_TAR
;
147 archive_entry_linkresolver_free(struct archive_entry_linkresolver
*res
)
149 struct links_entry
*le
;
154 if (res
->buckets
!= NULL
) {
155 while ((le
= next_entry(res
)) != NULL
)
156 archive_entry_free(le
->entry
);
164 archive_entry_linkify(struct archive_entry_linkresolver
*res
,
165 struct archive_entry
**e
, struct archive_entry
**f
)
167 struct links_entry
*le
;
168 struct archive_entry
*t
;
170 *f
= NULL
; /* Default: Don't return a second entry. */
173 le
= next_entry(res
);
181 /* If it has only one link, then we're done. */
182 if (archive_entry_nlink(*e
) == 1)
184 /* Directories never have hardlinks. */
185 if (archive_entry_filetype(*e
) == AE_IFDIR
)
188 switch (res
->strategy
) {
189 case ARCHIVE_ENTRY_LINKIFY_LIKE_TAR
:
190 le
= find_entry(res
, *e
);
192 archive_entry_set_size(*e
, 0);
193 archive_entry_copy_hardlink(*e
,
194 archive_entry_pathname(le
->canonical
));
196 insert_entry(res
, *e
);
198 case ARCHIVE_ENTRY_LINKIFY_LIKE_MTREE
:
199 le
= find_entry(res
, *e
);
201 archive_entry_copy_hardlink(*e
,
202 archive_entry_pathname(le
->canonical
));
204 insert_entry(res
, *e
);
206 case ARCHIVE_ENTRY_LINKIFY_LIKE_OLD_CPIO
:
207 /* This one is trivial. */
209 case ARCHIVE_ENTRY_LINKIFY_LIKE_NEW_CPIO
:
210 le
= find_entry(res
, *e
);
213 * Put the new entry in le, return the
219 /* Make the old entry into a hardlink. */
220 archive_entry_set_size(*e
, 0);
221 archive_entry_copy_hardlink(*e
,
222 archive_entry_pathname(le
->canonical
));
223 /* If we ran out of links, return the
224 * final entry as well. */
225 if (le
->links
== 0) {
231 * If we haven't seen it, tuck it away
234 le
= insert_entry(res
, *e
);
245 static struct links_entry
*
246 find_entry(struct archive_entry_linkresolver
*res
,
247 struct archive_entry
*entry
)
249 struct links_entry
*le
;
254 /* Free a held entry. */
255 if (res
->spare
!= NULL
) {
256 archive_entry_free(res
->spare
->canonical
);
257 archive_entry_free(res
->spare
->entry
);
262 /* If the links cache overflowed and got flushed, don't bother. */
263 if (res
->buckets
== NULL
)
266 dev
= archive_entry_dev(entry
);
267 ino
= archive_entry_ino(entry
);
270 /* Try to locate this entry in the links cache. */
271 bucket
= hash
% res
->number_buckets
;
272 for (le
= res
->buckets
[bucket
]; le
!= NULL
; le
= le
->next
) {
274 && dev
== archive_entry_dev(le
->canonical
)
275 && ino
== archive_entry_ino(le
->canonical
)) {
277 * Decrement link count each time and release
278 * the entry if it hits zero. This saves
279 * memory and is necessary for detecting
285 /* Remove it from this hash bucket. */
286 if (le
->previous
!= NULL
)
287 le
->previous
->next
= le
->next
;
288 if (le
->next
!= NULL
)
289 le
->next
->previous
= le
->previous
;
290 if (res
->buckets
[bucket
] == le
)
291 res
->buckets
[bucket
] = le
->next
;
292 res
->number_entries
--;
293 /* Defer freeing this entry. */
301 static struct links_entry
*
302 next_entry(struct archive_entry_linkresolver
*res
)
304 struct links_entry
*le
;
307 /* Free a held entry. */
308 if (res
->spare
!= NULL
) {
309 archive_entry_free(res
->spare
->canonical
);
314 /* If the links cache overflowed and got flushed, don't bother. */
315 if (res
->buckets
== NULL
)
318 /* Look for next non-empty bucket in the links cache. */
319 for (bucket
= 0; bucket
< res
->number_buckets
; bucket
++) {
320 le
= res
->buckets
[bucket
];
322 /* Remove it from this hash bucket. */
323 if (le
->next
!= NULL
)
324 le
->next
->previous
= le
->previous
;
325 res
->buckets
[bucket
] = le
->next
;
326 res
->number_entries
--;
327 /* Defer freeing this entry. */
335 static struct links_entry
*
336 insert_entry(struct archive_entry_linkresolver
*res
,
337 struct archive_entry
*entry
)
339 struct links_entry
*le
;
342 /* Add this entry to the links cache. */
343 le
= malloc(sizeof(struct links_entry
));
346 memset(le
, 0, sizeof(*le
));
347 le
->canonical
= archive_entry_clone(entry
);
349 /* If the links cache is getting too full, enlarge the hash table. */
350 if (res
->number_entries
> res
->number_buckets
* 2)
353 hash
= archive_entry_dev(entry
) ^ archive_entry_ino(entry
);
354 bucket
= hash
% res
->number_buckets
;
356 /* If we could allocate the entry, record it. */
357 if (res
->buckets
[bucket
] != NULL
)
358 res
->buckets
[bucket
]->previous
= le
;
359 res
->number_entries
++;
360 le
->next
= res
->buckets
[bucket
];
362 res
->buckets
[bucket
] = le
;
364 le
->links
= archive_entry_nlink(entry
) - 1;
369 grow_hash(struct archive_entry_linkresolver
*res
)
371 struct links_entry
*le
, **new_buckets
;
375 /* Try to enlarge the bucket list. */
376 new_size
= res
->number_buckets
* 2;
377 new_buckets
= malloc(new_size
* sizeof(struct links_entry
*));
379 if (new_buckets
!= NULL
) {
380 memset(new_buckets
, 0,
381 new_size
* sizeof(struct links_entry
*));
382 for (i
= 0; i
< res
->number_buckets
; i
++) {
383 while (res
->buckets
[i
] != NULL
) {
384 /* Remove entry from old bucket. */
385 le
= res
->buckets
[i
];
386 res
->buckets
[i
] = le
->next
;
388 /* Add entry to new bucket. */
389 bucket
= le
->hash
% new_size
;
391 if (new_buckets
[bucket
] != NULL
)
392 new_buckets
[bucket
]->previous
=
394 le
->next
= new_buckets
[bucket
];
396 new_buckets
[bucket
] = le
;
400 res
->buckets
= new_buckets
;
401 res
->number_buckets
= new_size
;