Configuration has been migrated to files
[zip-parser.git] / zip_parser
blob3c84f7a1f011b8974e36317e713d06eedc70f3fe
1 #!/usr/bin/perl
3 use strict;
4 use warnings;
5 use Data::ParseBinary;
6 use Data::Dumper;
8 # *** Lacks the capability to parse Zip64 and encrypted .Zip files ***
10 # Check if a file has been passed as an argument or not
11 if( @ARGV == 0 ) {
12 print " No .zip file has been passed.\n";
13 exit;
16 # Slurp!
17 undef $/;
19 # Open the file and get filehandle
20 my $filename = shift;
21 open my $fh, '<', $filename or die 'can not open $filename';
22 binmode $fh;
24 # List the printables
25 my @printable_local_file_header = do 'lfh.conf';
26 my @printable_central_directory_record = do 'cdr.conf';
27 my @printable_end_central_directory_record = do 'ecdr.conf';
29 sub printable {#{{{
30 my ( $var, $header ) = @_;
31 my @printable;
33 if ( $header eq 'lfh' ) { @printable = @printable_local_file_header }
34 elsif( $header eq 'cdr' ) { @printable = @printable_central_directory_record }
35 elsif( $header eq 'ecdr' ) { @printable = @printable_end_central_directory_record }
37 map { return 1 if($var eq $_) } @printable;
39 return 0;
41 #}}}
43 sub dissect {#{{{
45 my ( $data, $header ) = @_;
47 if ( $header eq 'lfh' ) { print "\tLOCAL FILE HEADER\n" , '-' x 50, "\n\n" }
48 elsif( $header eq 'cdr' ) { print "\tCENTRAL DIRECTORY RECORD\n" , '-' x 50, "\n\n" }
49 elsif( $header eq 'ecdr' ) { print "\tEND CENTRAL DIRECTORY RECORD\n", '-' x 50, "\n\n" }
51 if( $header eq 'lfh' || $header eq 'cdr' ) {
52 &mod_file_date_time ($data);
53 &extra_field ($data);
54 &compression_method ($data);
55 &general_purpose_bit_flag ($data);
56 &version_needed_to_extract($data);
57 } else { $data = [$data] }
59 for( @$data ) {
60 my %temp = %$_;
62 map {
63 if( &printable($_, $header) ) {
65 # Have to clean up the following, or maybe ponder of a better method
66 if( ref($temp{$_}) eq '' || ref($temp{$_}) eq 'SCALAR' ) {
67 printf "%s -> \n\t%s\n", $_, $temp{$_};
68 } elsif( ref($temp{$_}) eq 'ARRAY' ) {
69 printf "%s ->", $_;
70 map print("\n\t$_"), @{ $temp{$_} };
71 print "\n";
72 } elsif( ref($temp{$_}) eq 'HASH' ) {
73 printf "%s ->", $_;
74 my %hash = %{$temp{$_}};
76 map {
77 if( ref($hash{$_}) eq 'ARRAY' ) {
78 printf "\n%s ->", $_;
79 map print("\n\t$_"), @{ $hash{$_} };
80 } else { printf "\n%s -> %s", $_, $hash{$_} }
81 } keys %hash;
83 print "\n";
87 } keys %temp;
89 print "\n\n";
92 #}}}
94 sub compression_method {#{{{
95 my $data = shift;
97 for( my $i = 0; $i < @$data; $i++ ) {
98 exists $data->[$i]{'Compression_Method'}
99 or die " Compression_Method is not present\n";
101 my %compression_method = (
102 '0' => 'The file is stored (no compression)',
103 '1' => 'The file is Shrunk',
104 '2' => 'The file is Reduced with compression factor 1',
105 '3' => 'The file is Reduced with compression factor 2',
106 '4' => 'The file is Reduced with compression factor 3',
107 '5' => 'The file is Reduced with compression factor 4',
108 '6' => 'The file is Imploded',
109 '7' => 'Reserved for Tokenizing compression algorithm',
110 '8' => 'The file is Deflated',
111 '9' => 'Enhanced Deflating using Deflate64(tm)',
112 '10' => 'PKWARE Data Compression Library Imploding (old IBM TERSE)',
113 '11' => 'Reserved by PKWARE',
114 '12' => 'File is compressed using BZIP2 algorithm',
115 '13' => 'Reserved by PKWARE',
116 '14' => 'LZMA (EFS)',
117 '15' => 'Reserved by PKWARE',
118 '16' => 'Reserved by PKWARE',
119 '17' => 'Reserved by PKWARE',
120 '18' => 'File is compressed using IBM TERSE (new)',
121 '19' => 'IBM LZ77 z Architecture (PFS)',
122 '97' => 'WavPack compressed data',
123 '98' => 'PPMd version I, Rev 1',
126 $data->[$i]{'Compression_Method'} = $compression_method{ $data->[$i]{'Compression_Method'} };
130 #}}}
132 sub general_purpose_bit_flag {#{{{
133 my $data = shift;
135 for( my $i = 0; $i < @$data; $i++ ) {
136 exists $data->[$i]{'General_Purpose_Bit_Flag'}
137 or die " General_Purpose_Bit_Flag is not present\n";
139 my $bit0 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit0' };
140 my $bit1 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit1' };
141 my $bit2 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit2' };
142 my $bit3 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit3' };
143 my $bit4 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit4' };
144 my $bit5 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit5' };
145 my $bit6 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit6' };
146 my $bit11 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit11'};
147 my $bit12 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit12'};
148 my $bit13 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit13'};
150 my @general_purpose_bit_flag;
151 push @general_purpose_bit_flag, 'File is encrypted' if $bit0 == 1;
153 if( $data->[$i]{'Compression_Method'} eq 'The file is Imploded' ) {
155 if( $bit1 == 1 ) { push @general_purpose_bit_flag, '8K sliding dictionary' }
156 else { push @general_purpose_bit_flag, '4K sliding dictionary' }
157 if( $bit2 == 1 ) { push @general_purpose_bit_flag, '3 Shannon-Fano trees were used to encode the sliding dictionary output' }
158 else { push @general_purpose_bit_flag, '2 Shannon-Fano trees were used to encode the sliding dictionary output' }
160 } elsif( $data->[$i]{'Compression_Method'} eq 'The file is Deflated' ||
161 $data->[$i]{'Compression_Method'} eq 'Enhanced Deflating using Deflate64(tm)' ) {
163 push @general_purpose_bit_flag, 'Normal (-en) compression option was used' if $bit2 == 0 && $bit1 == 0;
164 push @general_purpose_bit_flag, 'Maximum (-exx/-ex) compression option was used' if $bit2 == 0 && $bit1 == 1;
165 push @general_purpose_bit_flag, 'Fast (-ef) compression option was used' if $bit2 == 1 && $bit1 == 0;
166 push @general_purpose_bit_flag, 'Super Fast (-es) compression option was used' if $bit2 == 1 && $bit1 == 1;
168 } elsif( $data->[$i]{'Compression_Method'} eq 'LZMA (EFS)' ) {
170 if( $bit1 == 1 ) {
171 push @general_purpose_bit_flag, 'End-of-stream (EOS) marker is used to mark the end of the compressed data stream';
172 } else {
173 push @general_purpose_bit_flag, 'End-of-stream (EOS) marker is not present and the compressed data size must be known to extract';
176 } else {
178 if( $bit3 == 1 ) {
179 $data->[$i]{'CRC-32' } = 0;
180 $data->[$i]{'Compressed_Size' } = 0;
181 $data->[$i]{'Uncompressed_Size'} = 0;
183 push @general_purpose_bit_flag, 'Data Descriptor contains CRC-32, Compressed_Size and Uncompressed_Size';
186 if( $bit4 == 1 && $data->[$i]{'Compression_Method'} eq 'The file is Deflated' ) {
187 push @general_purpose_bit_flag, 'Enhanced deflating';
188 } elsif( $bit4 == 1 && $data->[$i]{'Compression_Method'} ne 'The file is Deflated' ) {
189 die ' Enhanced deflating cannot be done on a file that is not deflated';
192 if( $bit5 == 1 && $data->[$i]{'Version_Needed_To_Extract'} >= 27 ) {
193 push @general_purpose_bit_flag, 'Compressed patched data'
194 } elsif( $bit5 == 1 ) {
195 die ' Incompatible Version_Needed_To_Extract for patched compressed data';
198 if( $bit6 == 1 && $data->[$i]{'Version_Needed_To_Extract'} >= 50 && $bit0 == 1 ) {
199 push @general_purpose_bit_flag, 'Strong encryption'
200 } elsif( $bit6 == 1 && $data->[$i]{'Version_Needed_To_Extract'} < 50 && $bit0 == 1 ) {
201 die ' Incompatible Version_Needed_To_Extract for strong encryption';
202 } elsif( $bit6 == 1 && $bit0 == 0 ) {
203 die ' Non-encrypted file cannot be strong encrypted';
206 push @general_purpose_bit_flag, 'Filename and comment fields for this file must be encoded using UTF-8' if $bit11 == 1;
207 push @general_purpose_bit_flag, 'Enhanced compression' if $bit12 == 1;
208 push @general_purpose_bit_flag, 'Selected data values in the Local Header are masked' if $bit13 == 1;
211 $data->[$i]{'General_Purpose_Bit_Flag'} = [ @general_purpose_bit_flag ];
214 #}}}
216 sub version_needed_to_extract {#{{{
217 my $data = shift;
219 my %version_mappings = (
220 '10' => 'Default value',
221 '11' => 'File is a volume label',
222 '20' => "File is a folder (directory)" .
223 "\n\tFile is compressed using Deflate compression" .
224 "\n\tFile is encrypted using traditional PKWARE encryption",
225 '21' => 'File is compressed using Deflate64(tm)',
226 '25' => 'File is compressed using PKWARE DCL Implode ',
227 '27' => 'File is a patch data set ',
228 '45' => 'File uses ZIP64 format extensions',
229 '46' => 'File is compressed using BZIP2 compression*',
230 '50' => "File is encrypted using DES" .
231 "\n\tFile is encrypted using 3DES" .
232 "\n\tFile is encrypted using original RC2 encryption" .
233 "\n\tFile is encrypted using RC4 encryption",
234 '51' => "File is encrypted using AES encryption" .
235 "\n\tFile is encrypted using corrected RC2 encryption",
236 '52' => 'File is encrypted using corrected RC2-64 encryption',
237 '61' => 'File is encrypted using non-OAEP key wrapping',
238 '62' => 'Central directory encryption',
239 '63' => "File is compressed using LZMA" .
240 "\n\tFile is compressed using PPMd" .
241 "\n\tFile is encrypted using Blowfish" .
242 "\n\tFile is encrypted using Twofish",
245 for( my $i = 0; $i < @$data; $i++ ) {
246 exists $data->[$i]{'Version_Needed_To_Extract'}
247 or die " Version_Needed_To_Extract is not present\n";
248 exists $version_mappings{ $data->[$i]{'Version_Needed_To_Extract'} }
249 or die " Version_Needed_To_Extract has an illegal value\n";
251 $data->[$i]{'Version_Needed_To_Extract'} = $version_mappings{ $data->[$i]{'Version_Needed_To_Extract'} };
254 #}}}
256 sub extra_field {#{{{
257 my $data = shift;
258 my ( @header, @data );
260 my %header_mappings = (
261 '0001' => 'Zip64 extended information extra field',
262 '0007' => 'AV Info',
263 '0008' => 'Reserved for extended language encoding data (PFS)',
264 '0009' => 'OS/2',
265 '000a' => 'NTFS ',
266 '000c' => 'OpenVMS',
267 '000d' => 'UNIX',
268 '000e' => 'Reserved for file stream and fork descriptors',
269 '000f' => 'Patch Descriptor',
270 '0014' => 'PKCS#7 Store for X.509 Certificates',
271 '0015' => 'X.509 Certificate ID and Signature for individual file',
272 '0016' => 'X.509 Certificate ID for Central Directory',
273 '0017' => 'Strong Encryption Header',
274 '0018' => 'Record Management Controls',
275 '0019' => 'PKCS#7 Encryption Recipient Certificate List',
276 '0065' => 'IBM S/390 (Z390), AS/400 (I400) attributes - uncompressed',
277 '0066' => 'Reserved for IBM S/390 (Z390), AS/400 (I400) attributes - compressed',
278 '4690' => 'POSZIP 4690 (reserved) ',
279 '07c8' => 'Macintosh',
280 '2605' => 'ZipIt Macintosh',
281 '2705' => 'ZipIt Macintosh 1.3.5+',
282 '2805' => 'ZipIt Macintosh 1.3.5+',
283 '334d' => 'Info-ZIP Macintosh',
284 '4341' => 'Acorn/SparkFS ',
285 '4453' => 'Windows NT security descriptor (binary ACL)',
286 '4704' => 'VM/CMS',
287 '470f' => 'MVS',
288 '4b46' => 'FWKCS MD5 (see below)',
289 '4c41' => 'OS/2 access control list (text ACL)',
290 '4d49' => 'Info-ZIP OpenVMS',
291 '4f4c' => 'Xceed original location extra field',
292 '5356' => 'AOS/VS (ACL)',
293 '5455' => 'extended timestamp',
294 '554e' => 'Xceed unicode extra field',
295 '5855' => 'Info-ZIP UNIX (original, also OS/2, NT, etc)',
296 '6375' => 'Info-ZIP Unicode Comment Extra Field',
297 '6542' => 'BeOS/BeBox',
298 '7075' => 'Info-ZIP Unicode Path Extra Field',
299 '756e' => 'ASi UNIX',
300 '7855' => 'Info-ZIP UNIX (new)',
301 'a220' => 'Microsoft Open Packaging Growth Hint',
302 'fd4a' => 'SMS/QDOS',
305 for( my $i = 0; $i < @$data; $i++ ) {
307 if( exists $data->[$i]{'Extra_Field'} ) {
308 for( my $j = 0; $j < length $data->[$i]{'Extra_Field'}; $j += 4 ) {
309 my $header = unpack("H4", pack('Z*', substr($data->[$i]{'Extra_Field'}, $j, 2)));
310 $header = substr($header, 2, 2) . substr($header, 0, 2);
311 $header = $header_mappings{$header} if exists $header_mappings{$header};
312 push @header, $header;
314 my $data = unpack("H4", pack('Z*', substr($data->[$i]{'Extra_Field'}, $j + 2, 2)));
315 push @data, substr($data, 2, 2) . substr($data, 0, 2);
317 $data->[$i]{'Extra_Field'} = {
318 Header => \@header,
319 Data => \@data,
325 #}}}
327 sub mod_file_date_time {#{{{
328 my $data = shift;
330 for( 0 .. @$data - 1 ) {
332 # Convert Last Mod File Time to Hour, Minute and Second
333 $data->[$_]{'Last_Mod_File_Time'} = pack('n', $data->[$_]{'Last_Mod_File_Time'});
334 my $convert = BitStruct('Last_Mod_File_Time',
335 BitField('Hour' , 5),
336 BitField('Minute', 6),
337 BitField('Second', 5),
339 $data->[$_]{'Last_Mod_File_Time'} = $convert->parse(CreateStreamReader($data->[$_]{'Last_Mod_File_Time'}));
341 # Convert Last Mod File Date to Year, Month and Day
342 $data->[$_]{'Last_Mod_File_Date'} = pack('n', $data->[$_]{'Last_Mod_File_Date'});
343 $convert = BitStruct('Last Mod File Date',
344 BitField('Year' , 7),
345 BitField('Month', 4),
346 BitField('Day' , 5),
348 $data->[$_]{'Last_Mod_File_Date'} = $convert->parse(CreateStreamReader($data->[$_]{'Last_Mod_File_Date'}));
349 $data->[$_]{'Last_Mod_File_Date'}{'Year'} = $data->[$_]{'Last_Mod_File_Date'}{'Year'} + 1980;
353 #}}}
355 # Zip is little endian
357 my $parser_end_central_directory_record = Struct('zip',
358 Const(
359 Bytes('End_Of_Central_Dir_Signature', 4), "\x50\x4B\x05\x06"
361 Anchor('Position'),
362 ULInt16('Number_Of_This_Disk' ),
363 ULInt16('Number_Of_The_Disk_With_The_Start_Of_The_Central_Directory' ),
364 ULInt16('Total_Number_Of_Entries_In_The_Central_Directory_On_This_Disk' ),
365 ULInt16('Total_Number_Of_Entries_In_The_Central_Directory' ),
366 ULInt32('Size_Of_The_Central_Directory' ),
367 ULInt32('Offset_Of_Start_Of_Central_Directory_With_Respect_To_The_Starting_Disk_Number'),
368 ULInt16('.ZIP_File_Comment_Length' ),
369 Field('.ZIP_File_Comment', sub { $_->ctx->{'.ZIP_File_Comment_Length'} }),
372 seek $fh, -22, 2;
374 my $number_of_files;
375 my $stream = CreateStreamReader(File => $fh);
376 my $pecdr = $parser_end_central_directory_record->parse($stream);
378 &dissect($pecdr, 'ecdr');
379 $number_of_files = $pecdr->{'Total_Number_Of_Entries_In_The_Central_Directory'};
381 my $parser_local_file_header = Array($number_of_files,
382 Struct('zip',
383 Const(
384 Bytes('Local_File_Header_Signature', 4), "\x50\x4B\x03\x04"
386 Pointer( sub { 0 }, Bytes("\x50\x4B\x03\x04", 4)),
387 ULInt16('Version_Needed_To_Extract'),
388 BitStruct('General_Purpose_Bit_Flag',
389 Padding(1),
390 Flag('Bit6' ),
391 Flag('Bit5' ),
392 Flag('Bit4' ),
393 Flag('Bit3' ),
394 Flag('Bit2' ),
395 Flag('Bit1' ),
396 Flag('Bit0' ),
397 Padding(2),
398 Flag('Bit13'),
399 Flag('Bit12'),
400 Flag('Bit11'),
401 Padding(3),
403 ULInt16('Compression_Method'),
404 ULInt16('Last_Mod_File_Time'),
405 ULInt16('Last_Mod_File_Date'),
406 ULInt32('CRC-32' ),
407 ULInt32('Compressed_Size' ),
408 ULInt32('Uncompressed_Size' ),
409 ULInt16('Filename_Length' ),
410 ULInt16('Extra_Field_Length'),
411 String ('Filename' , sub { $_->ctx->{'Filename_Length' } }),
412 Field ('Extra_Field' , sub { $_->ctx->{'Extra_Field_Length'} }),
413 Field ('Compressed_Data', sub { $_->ctx->{'Compressed_Size' } }),
414 If(sub { $_->ctx->{'General_Purpose_Bit_Flag'}->{'Bit3'} },
415 Struct('Data_Descriptor',
416 ULInt32('CRC-32' ),
417 ULInt32('Compressed_Size' ),
418 ULInt32('Uncompressed_Size'),
421 Anchor('Position'),
425 my $parser_central_directory_record = Array($number_of_files,
426 Struct('zip',
427 Const(
428 Bytes('Central_File_Header_Signature', 4), "\x50\x4B\x01\x02"
430 Struct('Version_Made_By',
431 ULInt8('Specification'),
432 ULInt8('Compatibility'),
434 ULInt16('Version_Needed_To_Extract'),
435 BitStruct('General_Purpose_Bit_Flag',
436 Padding(1 ),
437 Flag('Bit6' ),
438 Flag('Bit5' ),
439 Flag('Bit4' ),
440 Flag('Bit3' ),
441 Flag('Bit2' ),
442 Flag('Bit1' ),
443 Flag('Bit0' ),
444 Padding(2 ),
445 Flag('Bit13'),
446 Flag('Bit12'),
447 Flag('Bit11'),
448 Padding(3 ),
450 ULInt16('Compression_Method' ),
451 ULInt16('Last_Mod_File_Time' ),
452 ULInt16('Last_Mod_File_Date' ),
453 ULInt32('CRC-32' ),
454 ULInt32('Compressed_Size' ),
455 ULInt32('Uncompressed_Size' ),
456 ULInt16('Filename_Length' ),
457 ULInt16('Extra_Field_Length' ),
458 ULInt16('File_Comment_Length' ),
459 ULInt16('Disk_Number_Start' ),
460 ULInt16('Internal_File_Attributes' ),
461 ULInt32('External_File_Attributes' ),
462 ULInt32('Relative_Offset_Of_Local_Header'),
463 String ('Filename' , sub { $_->ctx->{'Filename_Length' } }),
464 Field ('Extra_Field' , sub { $_->ctx->{'Extra_Field_Length' } }),
465 Field ('File_Comment', sub { $_->ctx->{'File_Comment_Length'} }),
469 seek $fh, 0, 0;
470 $stream = CreateStreamReader(File => $fh);
472 &dissect( $parser_local_file_header->parse($stream), 'lfh');
473 &dissect($parser_central_directory_record->parse($stream), 'cdr');