1 /*****************************************************************************
2 * archive.c: libarchive based stream filter
3 *****************************************************************************
4 * Copyright (C) 2016 VLC authors and VideoLAN
7 * Authors: Filip Roséen <filip@atch.se>
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of the GNU Lesser General Public License as published by
11 * the Free Software Foundation; either version 2.1 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public License
20 * along with this program; if not, write to the Free Software Foundation,
21 * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
22 *****************************************************************************/
28 #include <vlc_common.h>
29 #include <vlc_plugin.h>
30 #include <vlc_stream.h>
31 #include <vlc_stream_extractor.h>
32 #include <vlc_dialog.h>
33 #include <vlc_input_item.h>
37 #include <archive_entry.h>
39 #if ARCHIVE_VERSION_NUMBER < 3002000
40 typedef __LA_INT64_T la_int64_t
;
41 typedef __LA_SSIZE_T la_ssize_t
;
44 static int ExtractorOpen( vlc_object_t
* );
45 static void ExtractorClose( vlc_object_t
* );
47 static int DirectoryOpen( vlc_object_t
* );
48 static void DirectoryClose( vlc_object_t
* );
51 set_category( CAT_INPUT
)
52 set_subcategory( SUBCAT_INPUT_STREAM_FILTER
)
53 set_capability( "stream_directory", 99 )
54 set_description( N_( "libarchive based stream directory" ) )
55 set_callbacks( DirectoryOpen
, DirectoryClose
);
58 set_description( N_( "libarchive based stream extractor" ) )
59 set_capability( "stream_extractor", 99 )
60 set_callbacks( ExtractorOpen
, ExtractorClose
);
64 typedef struct libarchive_callback_t libarchive_callback_t
;
65 typedef struct private_sys_t private_sys_t
;
66 typedef struct archive libarchive_t
;
70 libarchive_t
* p_archive
;
74 struct archive_entry
* p_entry
;
80 uint8_t buffer
[ 8192 ];
81 bool b_seekable_source
;
82 bool b_seekable_archive
;
84 libarchive_callback_t
** pp_callback_data
;
85 size_t i_callback_data
;
88 struct libarchive_callback_t
{
94 /* ------------------------------------------------------------------------- */
96 static int libarchive_exit_cb( libarchive_t
* p_arc
, void* p_obj
)
100 libarchive_callback_t
* p_cb
= (libarchive_callback_t
*)p_obj
;
102 if( p_cb
->p_sys
->source
== p_cb
->p_source
)
103 { /* DO NOT CLOSE OUR MOTHER STREAM */
104 if( !p_cb
->p_sys
->b_dead
&& vlc_stream_Seek( p_cb
->p_source
, 0 ) )
105 return ARCHIVE_FATAL
;
107 else if( p_cb
->p_source
)
109 vlc_stream_Delete( p_cb
->p_source
);
110 p_cb
->p_source
= NULL
;
116 static int libarchive_jump_cb( libarchive_t
* p_arc
, void* p_obj_current
,
119 libarchive_callback_t
* p_current
= (libarchive_callback_t
*)p_obj_current
;
120 libarchive_callback_t
* p_next
= (libarchive_callback_t
*)p_obj_next
;
122 if( libarchive_exit_cb( p_arc
, p_current
) )
123 return ARCHIVE_FATAL
;
125 if( p_next
->p_source
== NULL
)
126 p_next
->p_source
= vlc_stream_NewURL( p_next
->p_sys
->p_obj
,
129 return p_next
->p_source
? ARCHIVE_OK
: ARCHIVE_FATAL
;
133 static la_int64_t
libarchive_skip_cb( libarchive_t
* p_arc
, void* p_obj
,
138 libarchive_callback_t
* p_cb
= (libarchive_callback_t
*)p_obj
;
140 stream_t
* p_source
= p_cb
->p_source
;
141 private_sys_t
* p_sys
= p_cb
->p_sys
;
143 /* TODO: fix b_seekable_source on libarchive_callback_t */
145 if( p_sys
->b_seekable_source
)
147 if( vlc_stream_Seek( p_source
, vlc_stream_Tell( p_source
) + i_request
) )
148 return ARCHIVE_FATAL
;
153 ssize_t i_read
= vlc_stream_Read( p_source
, NULL
, i_request
);
154 return i_read
>= 0 ? i_read
: ARCHIVE_FATAL
;
157 static la_int64_t
libarchive_seek_cb( libarchive_t
* p_arc
, void* p_obj
,
158 la_int64_t offset
, int whence
)
162 libarchive_callback_t
* p_cb
= (libarchive_callback_t
*)p_obj
;
163 stream_t
* p_source
= p_cb
->p_source
;
169 case SEEK_SET
: whence_pos
= 0; break;
170 case SEEK_CUR
: whence_pos
= vlc_stream_Tell( p_source
); break;
171 case SEEK_END
: whence_pos
= stream_Size( p_source
); break;
172 default: vlc_assert_unreachable();
176 if( whence_pos
< 0 || vlc_stream_Seek( p_source
, whence_pos
+ offset
) )
177 return ARCHIVE_FATAL
;
179 return vlc_stream_Tell( p_source
);
182 static la_ssize_t
libarchive_read_cb( libarchive_t
* p_arc
, void* p_obj
,
183 const void** pp_dst
)
187 libarchive_callback_t
* p_cb
= (libarchive_callback_t
*)p_obj
;
189 stream_t
* p_source
= p_cb
->p_source
;
190 private_sys_t
* p_sys
= p_cb
->p_sys
;
192 ssize_t i_ret
= vlc_stream_Read( p_source
, &p_sys
->buffer
,
193 sizeof( p_sys
->buffer
) );
197 archive_set_error( p_sys
->p_archive
, ARCHIVE_FATAL
,
198 "libarchive_read_cb failed = %zd", i_ret
);
200 return ARCHIVE_FATAL
;
203 *pp_dst
= &p_sys
->buffer
;
207 /* ------------------------------------------------------------------------- */
209 static int archive_push_resource( private_sys_t
* p_sys
,
210 stream_t
* p_source
, char const* psz_url
)
212 libarchive_callback_t
** pp_callback_data
;
213 libarchive_callback_t
* p_callback_data
;
215 /* INCREASE BUFFER SIZE */
217 pp_callback_data
= realloc( p_sys
->pp_callback_data
,
218 sizeof( *p_sys
->pp_callback_data
) * ( p_sys
->i_callback_data
+ 1 ) );
220 if( unlikely( !pp_callback_data
) )
223 /* CREATE NEW NODE */
225 p_callback_data
= malloc( sizeof( *p_callback_data
) );
227 if( unlikely( !p_callback_data
) )
230 /* INITIALIZE AND APPEND */
232 p_callback_data
->psz_url
= psz_url
? strdup( psz_url
) : NULL
;
233 p_callback_data
->p_source
= p_source
;
234 p_callback_data
->p_sys
= p_sys
;
236 if( unlikely( !p_callback_data
->psz_url
&& psz_url
) )
238 free( p_callback_data
);
242 pp_callback_data
[ p_sys
->i_callback_data
++ ] = p_callback_data
;
243 p_sys
->pp_callback_data
= pp_callback_data
;
248 free( pp_callback_data
);
252 static int archive_init( private_sys_t
* p_sys
, stream_t
* source
)
254 /* CREATE ARCHIVE HANDLE */
256 p_sys
->p_archive
= archive_read_new();
258 if( unlikely( !p_sys
->p_archive
) )
260 msg_Dbg( p_sys
->p_obj
, "unable to create libarchive handle" );
266 p_sys
->b_seekable_archive
= false;
268 if( vlc_stream_Control( source
, STREAM_CAN_SEEK
,
269 &p_sys
->b_seekable_source
) )
271 msg_Warn( p_sys
->p_obj
, "unable to query whether source stream can seek" );
272 p_sys
->b_seekable_source
= false;
275 if( p_sys
->b_seekable_source
)
277 if( archive_read_set_seek_callback( p_sys
->p_archive
,
278 libarchive_seek_cb
) )
280 msg_Err( p_sys
->p_obj
, "archive_read_set_callback failed, aborting." );
285 /* ENABLE ALL FORMATS/FILTERS */
287 archive_read_support_filter_all( p_sys
->p_archive
);
288 archive_read_support_format_all( p_sys
->p_archive
);
290 /* REGISTER CALLBACK DATA */
292 if( archive_read_set_switch_callback( p_sys
->p_archive
,
293 libarchive_jump_cb
) )
295 msg_Err( p_sys
->p_obj
, "archive_read_set_switch_callback failed, aborting." );
299 for( size_t i
= 0; i
< p_sys
->i_callback_data
; ++i
)
301 if( archive_read_append_callback_data( p_sys
->p_archive
,
302 p_sys
->pp_callback_data
[i
] ) )
308 /* OPEN THE ARCHIVE */
310 if( archive_read_open2( p_sys
->p_archive
, p_sys
->pp_callback_data
[0], NULL
,
311 libarchive_read_cb
, libarchive_skip_cb
, libarchive_exit_cb
) )
313 msg_Dbg( p_sys
->p_obj
, "libarchive: %s",
314 archive_error_string( p_sys
->p_archive
) );
322 static int archive_clean( private_sys_t
* p_sys
)
324 libarchive_t
* p_arc
= p_sys
->p_archive
;
327 archive_entry_free( p_sys
->p_entry
);
330 archive_read_free( p_arc
);
332 p_sys
->p_entry
= NULL
;
333 p_sys
->p_archive
= NULL
;
338 static int archive_seek_subentry( private_sys_t
* p_sys
, char const* psz_subentry
)
340 libarchive_t
* p_arc
= p_sys
->p_archive
;
342 struct archive_entry
* entry
;
345 while( !( archive_status
= archive_read_next_header( p_arc
, &entry
) ) )
347 char const* entry_path
= archive_entry_pathname( entry
);
349 if( strcmp( entry_path
, psz_subentry
) == 0 )
351 p_sys
->p_entry
= archive_entry_clone( entry
);
353 if( unlikely( !p_sys
->p_entry
) )
359 archive_read_data_skip( p_arc
);
362 switch( archive_status
)
365 msg_Warn( p_sys
->p_obj
,
366 "libarchive: %s", archive_error_string( p_arc
) );
371 archive_set_error( p_arc
, ARCHIVE_FATAL
,
372 "archive does not contain >>> %s <<<", psz_subentry
);
377 /* check if seeking is supported */
379 if( p_sys
->b_seekable_source
)
381 if( archive_seek_data( p_sys
->p_archive
, 0, SEEK_CUR
) >= 0 )
382 p_sys
->b_seekable_archive
= true;
388 static int archive_extractor_reset( stream_extractor_t
* p_extractor
)
390 private_sys_t
* p_sys
= p_extractor
->p_sys
;
392 if( vlc_stream_Seek( p_extractor
->source
, 0 )
393 || archive_clean( p_sys
)
394 || archive_init( p_sys
, p_extractor
->source
)
395 || archive_seek_subentry( p_sys
, p_extractor
->identifier
) )
397 p_sys
->b_dead
= true;
402 p_sys
->b_eof
= false;
403 p_sys
->b_dead
= false;
407 /* ------------------------------------------------------------------------- */
409 static private_sys_t
* setup( vlc_object_t
* obj
, stream_t
* source
)
411 private_sys_t
* p_sys
= calloc( 1, sizeof( *p_sys
) );
412 char* psz_files
= var_InheritString( obj
, "concat-list" );
414 if( unlikely( !p_sys
) )
417 if( archive_push_resource( p_sys
, source
, NULL
) )
423 * path
= strtok_r( psz_files
, ",", &state
);
424 path
; path
= strtok_r( NULL
, ",", &state
) )
426 if( path
== psz_files
)
429 if( archive_push_resource( p_sys
, NULL
, path
) )
436 p_sys
->source
= source
;
447 static int probe( stream_t
* source
)
453 char const * p_bytes
;
454 } const magicbytes
[] = {
455 /* keep heaviest at top */
456 { 257, 5, "ustar" }, //TAR
457 { 0, 7, "Rar!\x1A\x07" }, //RAR
458 { 0, 6, "7z\xBC\xAF\x27\x1C" }, //7z
459 { 0, 4, "xar!" }, //XAR
460 { 0, 4, "PK\x03\x04" }, //ZIP
461 { 0, 4, "PK\x05\x06" }, //ZIP
462 { 0, 4, "PK\x07\x08" }, //ZIP
463 { 2, 3, "-lh" }, //LHA/LHZ
464 { 0, 3, "\x1f\x8b\x08" }, // Gzip
465 { 0, 3, "PAX" }, //PAX
466 { 0, 6, "070707" }, //CPIO
467 { 0, 6, "070701" }, //CPIO
468 { 0, 6, "070702" }, //CPIO
469 { 0, 4, "MSCH" }, //CAB
472 const uint8_t *p_peek
;
474 int i_peek
= vlc_stream_Peek( source
, &p_peek
,
475 magicbytes
[0].i_offset
+ magicbytes
[0].i_length
);
477 for(unsigned i
=0; i
< ARRAY_SIZE( magicbytes
); i
++)
479 if (i_peek
< magicbytes
[i
].i_offset
+ magicbytes
[i
].i_length
)
482 if ( !memcmp(p_peek
+ magicbytes
[i
].i_offset
,
483 magicbytes
[i
].p_bytes
, magicbytes
[i
].i_length
) )
490 /* ------------------------------------------------------------------------- */
492 static int Control( stream_extractor_t
* p_extractor
, int i_query
, va_list args
)
494 private_sys_t
* p_sys
= p_extractor
->p_sys
;
498 case STREAM_CAN_FASTSEEK
:
499 *va_arg( args
, bool* ) = false;
502 case STREAM_CAN_SEEK
:
503 *va_arg( args
, bool* ) = p_sys
->b_seekable_source
;
506 case STREAM_GET_SIZE
:
507 if( p_sys
->p_entry
== NULL
)
510 if( !archive_entry_size_is_set( p_sys
->p_entry
) )
513 *va_arg( args
, uint64_t* ) = archive_entry_size( p_sys
->p_entry
);
517 return vlc_stream_vaControl( p_extractor
->source
, i_query
, args
);
523 static int ReadDir( stream_directory_t
* p_directory
, input_item_node_t
* p_node
)
525 private_sys_t
* p_sys
= p_directory
->p_sys
;
526 libarchive_t
* p_arc
= p_sys
->p_archive
;
528 struct vlc_readdir_helper rdh
;
529 vlc_readdir_helper_init( &rdh
, p_directory
, p_node
);
530 struct archive_entry
* entry
;
533 while( !( archive_status
= archive_read_next_header( p_arc
, &entry
) ) )
535 if( archive_entry_filetype( entry
) == AE_IFDIR
)
538 char const* path
= archive_entry_pathname( entry
);
540 if( unlikely( !path
) )
543 char* mrl
= vlc_stream_extractor_CreateMRL( p_directory
, path
);
545 if( unlikely( !mrl
) )
548 if( vlc_readdir_helper_additem( &rdh
, mrl
, path
, NULL
, ITEM_TYPE_FILE
,
556 if( archive_read_data_skip( p_arc
) )
560 vlc_readdir_helper_finish( &rdh
, archive_status
== ARCHIVE_EOF
);
561 return archive_status
== ARCHIVE_EOF
? VLC_SUCCESS
: VLC_EGENERIC
;
564 static ssize_t
Read( stream_extractor_t
*p_extractor
, void* p_data
, size_t i_size
)
566 char dummy_buffer
[ 8192 ];
568 private_sys_t
* p_sys
= p_extractor
->p_sys
;
569 libarchive_t
* p_arc
= p_sys
->p_archive
;
572 if( p_sys
->b_dead
|| p_sys
->p_entry
== NULL
)
578 i_ret
= archive_read_data( p_arc
,
579 p_data
? p_data
: dummy_buffer
,
580 p_data
? i_size
: __MIN( i_size
, sizeof( dummy_buffer
) ) );
586 msg_Dbg( p_extractor
, "libarchive: %s", archive_error_string( p_arc
) );
590 msg_Warn( p_extractor
, "libarchive: %s", archive_error_string( p_arc
) );
594 msg_Err( p_extractor
, "libarchive: %s", archive_error_string( p_arc
) );
598 p_sys
->i_offset
+= i_ret
;
602 p_sys
->b_dead
= true;
609 static int archive_skip_decompressed( stream_extractor_t
* p_extractor
, uint64_t i_skip
)
613 ssize_t i_read
= Read( p_extractor
, NULL
, i_skip
);
624 static int Seek( stream_extractor_t
* p_extractor
, uint64_t i_req
)
626 private_sys_t
* p_sys
= p_extractor
->p_sys
;
628 if( !p_sys
->p_entry
|| !p_sys
->b_seekable_source
)
631 if( archive_entry_size_is_set( p_sys
->p_entry
) &&
632 (uint64_t)archive_entry_size( p_sys
->p_entry
) <= i_req
)
638 p_sys
->b_eof
= false;
640 if( !p_sys
->b_seekable_archive
|| p_sys
->b_dead
641 || archive_seek_data( p_sys
->p_archive
, i_req
, SEEK_SET
) < 0 )
643 msg_Dbg( p_extractor
,
644 "intrinsic seek failed: '%s' (falling back to dumb seek)",
645 archive_error_string( p_sys
->p_archive
) );
647 uint64_t i_skip
= i_req
- p_sys
->i_offset
;
649 /* RECREATE LIBARCHIVE HANDLE IF WE ARE SEEKING BACKWARDS */
651 if( i_req
< p_sys
->i_offset
)
653 if( archive_extractor_reset( p_extractor
) )
655 msg_Err( p_extractor
, "unable to reset libarchive handle" );
662 if( archive_skip_decompressed( p_extractor
, i_skip
) )
663 msg_Dbg( p_extractor
, "failed to skip to seek position" );
666 p_sys
->i_offset
= i_req
;
671 static void CommonClose( private_sys_t
* p_sys
)
673 p_sys
->b_dead
= true;
674 archive_clean( p_sys
);
676 for( size_t i
= 0; i
< p_sys
->i_callback_data
; ++i
)
678 free( p_sys
->pp_callback_data
[i
]->psz_url
);
679 free( p_sys
->pp_callback_data
[i
] );
682 free( p_sys
->pp_callback_data
);
686 static void DirectoryClose( vlc_object_t
* p_obj
)
688 stream_directory_t
* p_directory
= (void*)p_obj
;
689 return CommonClose( p_directory
->p_sys
);
692 static void ExtractorClose( vlc_object_t
* p_obj
)
694 stream_extractor_t
* p_extractor
= (void*)p_obj
;
695 return CommonClose( p_extractor
->p_sys
);
698 static private_sys_t
* CommonOpen( vlc_object_t
* p_obj
, stream_t
* source
)
700 if( probe( source
) )
703 private_sys_t
* p_sys
= setup( p_obj
, source
);
708 if( archive_init( p_sys
, source
) )
710 CommonClose( p_sys
);
717 static int DirectoryOpen( vlc_object_t
* p_obj
)
719 stream_directory_t
* p_directory
= (void*)p_obj
;
720 private_sys_t
* p_sys
= CommonOpen( p_obj
, p_directory
->source
);
725 p_directory
->p_sys
= p_sys
;
726 p_directory
->pf_readdir
= ReadDir
;
731 static int ExtractorOpen( vlc_object_t
* p_obj
)
733 stream_extractor_t
* p_extractor
= (void*)p_obj
;
734 private_sys_t
* p_sys
= CommonOpen( p_obj
, p_extractor
->source
);
739 if( archive_seek_subentry( p_sys
, p_extractor
->identifier
) )
741 CommonClose( p_sys
);
745 p_extractor
->p_sys
= p_sys
;
746 p_extractor
->pf_read
= Read
;
747 p_extractor
->pf_control
= Control
;
748 p_extractor
->pf_seek
= Seek
;