Updated LOCAL FILE HEADER structure
[zip-parser.git] / zip_parser
blob1135f5b51b85c716a58f3aa60818e473c99ae25b
1 #!/usr/bin/perl
3 use strict;
4 use warnings;
5 use Data::ParseBinary;
6 use Data::Dumper;
8 # Check if a file has been passed as an argument or not
9 if( @ARGV == 0 ) {
10 print " No .zip file has been passed.\n";
11 exit;
14 undef $/;
16 my $filename = shift;
17 open my $fh, '<', $filename or die 'can not open $filename';
18 binmode $fh;
21 my @printable_local_file_header = qw(
22 Filename Version_Needed_To_Extract General_Purpose_Bit_Flag Compression_Method
25 my @printable_central_directory_record = qw(
26 Filename General_Purpose_Bit_Flag
29 my @printable_end_central_directory_record = qw(
30 Total_Number_Of_Entries_In_The_Central_Directory
33 sub printable {#{{{
34 my ( $var, $header ) = @_;
35 my @printable;
37 if( $header eq 'lfh' ) {
38 @printable = @printable_local_file_header;
39 } elsif( $header eq 'cdr' ) {
40 @printable = @printable_central_directory_record;
41 } elsif( $header eq 'ecdr' ) {
42 @printable = @printable_end_central_directory_record;
45 for( @printable ) {
46 return 1 if($var eq $_);
48 return 0;
50 #}}}
52 sub dissect {#{{{
54 my ( $data, $header ) = @_;
56 if ( $header eq 'lfh' ) { print "\tLOCAL FILE HEADER\n" , '-' x 50, "\n\n" }
57 elsif( $header eq 'cdr' ) { print "\tCENTRAL DIRECTORY RECORD\n" , '-' x 50, "\n\n" }
58 elsif( $header eq 'ecdr' ) { print "\tEND CENTRAL DIRECTORY RECORD\n", '-' x 50, "\n\n" }
60 if( $header eq 'lfh' || $header eq 'cdr' ) {
62 &mod_file_date_time ($data);
63 &extra_field ($data);
64 &compression_method ($data);
65 &version_needed_to_extract($data);
66 &general_purpose_bit_flag ($data);
68 } else { $data = [$data] }
70 for( @$data ) {
72 my %temp = %$_;
74 map {
76 if( &printable($_, $header) ) {
78 if( ref($temp{$_}) eq '' || ref($temp{$_}) eq 'SCALAR' ) {
80 printf "%s -> %s\n", $_, $temp{$_}
82 } elsif( ref($temp{$_}) eq 'ARRAY' ) {
84 printf "%s ->", $_;
85 map print("\n\t$_"), @{ $temp{$_} };
86 print "\n";
88 } elsif( ref($temp{$_} eq 'HASH') ) {
90 printf "%s ->", $_;
92 my %hash = %{$temp{$_}};
94 map {
96 if( ref($hash{$_}) eq 'ARRAY' ) {
98 printf "\n%s ->", $_;
99 map print("\n\t$_"), @{ $hash{$_} };
101 } else { printf "\n%s -> %s", $_, $hash{$_} }
103 } keys %hash;
105 print "\n";
110 } keys %temp;
112 print "\n\n";
115 #}}}
117 sub compression_method {#{{{
118 my $data = shift;
120 for( my $i = 0; $i < @$data; $i++ ) {
122 exists $data->[$i]{'Compression_Method'}
123 or die " Compression_Method is not present\n";
128 #}}}
130 sub general_purpose_bit_flag {#{{{
132 my $data = shift;
134 for( my $i = 0; $i < @$data; $i++ ) {
136 exists $data->[$i]{'General_Purpose_Bit_Flag'}
137 or die " General_Purpose_Bit_Flag is not present\n";
139 my $bit0 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit0' };
140 my $bit1 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit1' };
141 my $bit2 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit2' };
142 my $bit3 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit3' };
143 my $bit4 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit4' };
144 my $bit5 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit5' };
145 my $bit6 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit6' };
146 my $bit11 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit11'};
147 my $bit12 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit12'};
148 my $bit13 = ${$data->[$i]{'General_Purpose_Bit_Flag'}}{'Bit13'};
150 my @general_purpose_bit_flag;
152 push @general_purpose_bit_flag, 'File is encrypted' if $bit0 == 1;
153 push @general_purpose_bit_flag, 'Normal (-en) compression option was used' if $bit2 == 0 && $bit1 == 0;
154 push @general_purpose_bit_flag, 'Maximum (-exx/-ex) compression option was used' if $bit2 == 0 && $bit1 == 1;
155 push @general_purpose_bit_flag, 'Fast (-ef) compression option was used' if $bit2 == 1 && $bit1 == 0;
156 push @general_purpose_bit_flag, 'Super Fast (-es) compression option was used' if $bit2 == 1 && $bit1 == 1;
157 push @general_purpose_bit_flag, 'Compressed patched data' if $bit5 == 1;
158 push @general_purpose_bit_flag, 'Enhanced compression' if $bit5 == 1;
160 # Add a &check_version subroutine
161 if( $bit6 == 1 ) {
163 if( ref($data->[$i]{'Version_Needed_To_Extract'}) eq 'ARRAY' ) {
165 my $flag = 0;
167 map {
169 if( $_ eq 'File is encrypted using DES' ||
170 $_ eq 'File is encrypted using 3DES' ||
171 $_ eq 'File is encrypted using original RC2 encryption' ||
172 $_ eq 'File is encrypted using RC4 encryption' ||
173 $_ eq 'File is encrypted using AES encryption' ||
174 $_ eq 'File is encrypted using corrected RC2 encryption' ||
175 $_ eq 'File is encrypted using corrected RC2-64 encryption' ||
176 $_ eq 'File is encrypted using non-OAEP key wrapping' ||
177 $_ eq 'Central directory encryption' ||
178 $_ eq 'File is compressed using LZMA' ||
179 $_ eq 'File is compressed using PPMd' ||
180 $_ eq 'File is encrypted using Blowfish' ||
181 $_ eq 'File is encrypted using Twofish' ) {
183 push @general_purpose_bit_flag, 'Strong encryption';
184 $flag = 1;
188 } @{ $data->[$i]{'Version_Needed_To_Extract'} };
190 if( $flag == 0 ) { die " Bit 6 of General Purpose Bit Flag is incorrect\n" }
196 push @general_purpose_bit_flag, 'Filename and Comment fields are UTF-8 encoded' if $bit11 == 1;
197 push @general_purpose_bit_flag, 'Data values in the Local Header are masked' if $bit13 == 1;
199 $data->[$i]{'General_Purpose_Bit_Flag'} = [ @general_purpose_bit_flag ];
204 #}}}
206 sub version_needed_to_extract {#{{{
207 my $data = shift;
209 my %version_mappings = (
211 '10' => 'Default value',
212 '11' => 'File is a volume label',
214 '20' => "\n\tFile is a folder (directory)" .
215 "\n\tFile is compressed using Deflate compression" .
216 "\n\tFile is encrypted using traditional PKWARE encryption",
218 '21' => 'File is compressed using Deflate64(tm)',
219 '25' => 'File is compressed using PKWARE DCL Implode ',
220 '27' => 'File is a patch data set ',
221 '45' => 'File uses ZIP64 format extensions',
222 '46' => 'File is compressed using BZIP2 compression*',
224 '50' => "\n\tFile is encrypted using DES" .
225 "\n\tFile is encrypted using 3DES" .
226 "\n\tFile is encrypted using original RC2 encryption" .
227 "\n\tFile is encrypted using RC4 encryption",
229 '51' => "\n\tFile is encrypted using AES encryption" .
230 "\n\tFile is encrypted using corrected RC2 encryption",
232 '52' => 'File is encrypted using corrected RC2-64 encryption',
233 '61' => 'File is encrypted using non-OAEP key wrapping',
234 '62' => 'Central directory encryption',
236 '63' => "\n\tFile is compressed using LZMA" .
237 "\n\tFile is compressed using PPMd" .
238 "\n\tFile is encrypted using Blowfish" .
239 "\n\tFile is encrypted using Twofish",
243 for( my $i = 0; $i < @$data; $i++ ) {
245 exists $data->[$i]{'Version_Needed_To_Extract'}
246 or die " Version_Needed_To_Extract is not present\n";
247 exists $version_mappings{ $data->[$i]{'Version_Needed_To_Extract'} }
248 or die " Version_Needed_To_Extract has an illegal value\n";
250 $data->[$i]{'Version_Needed_To_Extract'} =
251 $version_mappings{ $data->[$i]{'Version_Needed_To_Extract'} },
257 #}}}
259 sub extra_field {#{{{
260 my $data = shift;
261 my ( @header, @data );
262 my %header_mappings = (
263 '0001' => 'Zip64 extended information extra field',
264 '0007' => 'AV Info',
265 '0008' => 'Reserved for extended language encoding data (PFS)',
266 '0009' => 'OS/2',
267 '000a' => 'NTFS ',
268 '000c' => 'OpenVMS',
269 '000d' => 'UNIX',
270 '000e' => 'Reserved for file stream and fork descriptors',
271 '000f' => 'Patch Descriptor',
272 '0014' => 'PKCS#7 Store for X.509 Certificates',
273 '0015' => 'X.509 Certificate ID and Signature for individual file',
274 '0016' => 'X.509 Certificate ID for Central Directory',
275 '0017' => 'Strong Encryption Header',
276 '0018' => 'Record Management Controls',
277 '0019' => 'PKCS#7 Encryption Recipient Certificate List',
278 '0065' => 'IBM S/390 (Z390), AS/400 (I400) attributes - uncompressed',
279 '0066' => 'Reserved for IBM S/390 (Z390), AS/400 (I400) attributes - compressed',
280 '4690' => 'POSZIP 4690 (reserved) ',
281 '07c8' => 'Macintosh',
282 '2605' => 'ZipIt Macintosh',
283 '2705' => 'ZipIt Macintosh 1.3.5+',
284 '2805' => 'ZipIt Macintosh 1.3.5+',
285 '334d' => 'Info-ZIP Macintosh',
286 '4341' => 'Acorn/SparkFS ',
287 '4453' => 'Windows NT security descriptor (binary ACL)',
288 '4704' => 'VM/CMS',
289 '470f' => 'MVS',
290 '4b46' => 'FWKCS MD5 (see below)',
291 '4c41' => 'OS/2 access control list (text ACL)',
292 '4d49' => 'Info-ZIP OpenVMS',
293 '4f4c' => 'Xceed original location extra field',
294 '5356' => 'AOS/VS (ACL)',
295 '5455' => 'extended timestamp',
296 '554e' => 'Xceed unicode extra field',
297 '5855' => 'Info-ZIP UNIX (original, also OS/2, NT, etc)',
298 '6375' => 'Info-ZIP Unicode Comment Extra Field',
299 '6542' => 'BeOS/BeBox',
300 '7075' => 'Info-ZIP Unicode Path Extra Field',
301 '756e' => 'ASi UNIX',
302 '7855' => 'Info-ZIP UNIX (new)',
303 'a220' => 'Microsoft Open Packaging Growth Hint',
304 'fd4a' => 'SMS/QDOS',
307 for( my $i = 0; $i < @$data; $i++ ) {
309 if( exists $data->[$i]{'Extra_Field'} ) {
311 for( my $j = 0; $j < length $data->[$i]{'Extra_Field'}; $j += 4 ) {
313 my $header = unpack("H4", pack('Z*', substr($data->[$i]{'Extra_Field'}, $j, 2)));
314 $header = substr($header, 2, 2) . substr($header, 0, 2);
315 $header = $header_mappings{$header} if exists $header_mappings{$header};
316 push @header, $header;
318 my $data = unpack("H4", pack('Z*', substr($data->[$i]{'Extra_Field'}, $j + 2, 2)));
319 push @data, substr($data, 2, 2) . substr($data, 0, 2);
323 $data->[$i]{'Extra_Field'} = {
324 Header => \@header,
325 Data => \@data,
330 #}}}
332 sub mod_file_date_time {#{{{
333 my $data = shift;
334 for( 0 .. @$data - 1 ) {
335 # Convert Last Mod File Time to Hour, Minute and Second
336 $data->[$_]{'Last_Mod_File_Time'} = pack('n', $data->[$_]{'Last_Mod_File_Time'});
337 my $convert = BitStruct('Last_Mod_File_Time',
338 BitField('Hour' , 5),
339 BitField('Minute', 6),
340 BitField('Second', 5),
342 $data->[$_]{'Last_Mod_File_Time'} = $convert->parse(CreateStreamReader($data->[$_]{'Last_Mod_File_Time'}));
343 # Convert Last Mod File Date to Year, Month and Day
344 $data->[$_]{'Last_Mod_File_Date'} = pack('n', $data->[$_]{'Last_Mod_File_Date'});
345 $convert = BitStruct('Last Mod File Date',
346 BitField('Year' , 7),
347 BitField('Month', 4),
348 BitField('Day' , 5),
350 $data->[$_]{'Last_Mod_File_Date'} = $convert->parse(CreateStreamReader($data->[$_]{'Last_Mod_File_Date'}));
351 $data->[$_]{'Last_Mod_File_Date'}{'Year'} = $data->[$_]{'Last_Mod_File_Date'}{'Year'} + 1980;
354 #}}}
356 # Zip is little endian
358 my $parser_end_central_directory_record = Struct('zip',
360 Const(
361 Bytes('End_Of_Central_Dir_Signature', 4), "\x50\x4B\x05\x06"
364 Anchor('Position'),
366 ULInt16('Number_Of_This_Disk' ),
367 ULInt16('Number_Of_The_Disk_With_The_Start_Of_The_Central_Directory' ),
368 ULInt16('Total_Number_Of_Entries_In_The_Central_Directory_On_This_Disk' ),
369 ULInt16('Total_Number_Of_Entries_In_The_Central_Directory' ),
371 ULInt32('Size_Of_The_Central_Directory' ),
372 ULInt32('Offset_Of_Start_Of_Central_Directory_With_Respect_To_The_Starting_Disk_Number'),
374 ULInt16('.ZIP_File_Comment_Length' ),
376 Field('.ZIP_File_Comment', sub { $_->ctx->{'.ZIP_File_Comment_Length'} }),
380 seek $fh, -22, 2;
382 my $number_of_files;
383 my $stream = CreateStreamReader(File => $fh);
384 my $pecdr = $parser_end_central_directory_record->parse($stream);
386 &dissect($pecdr, 'ecdr');
387 $number_of_files = $pecdr->{'Total_Number_Of_Entries_In_The_Central_Directory'},
389 my $parser_local_file_header = Array($number_of_files,
391 Struct('zip',
393 Const(
394 Bytes('Local_File_Header_Signature', 4), "\x50\x4B\x03\x04"
397 Pointer( sub { 0 }, Bytes("\x50\x4B\x03\x04", 4)),
399 ULInt16('Version_Needed_To_Extract'),
401 BitStruct('General_Purpose_Bit_Flag',
403 Padding(1),
405 Flag('Bit6' ),
406 Flag('Bit5' ),
407 Flag('Bit4' ),
408 Flag('Bit3' ),
409 Flag('Bit2' ),
410 Flag('Bit1' ),
411 Flag('Bit0' ),
413 Padding(2),
415 Flag('Bit13'),
416 Flag('Bit12'),
417 Flag('Bit11'),
419 Padding(3),
423 ULInt16('Compression_Method'),
424 ULInt16('Last_Mod_File_Time'),
425 ULInt16('Last_Mod_File_Date'),
427 ULInt32('CRC-32' ),
428 ULInt32('Compressed_Size' ),
429 ULInt32('Uncompressed_Size' ),
431 ULInt16('Filename_Length' ),
432 ULInt16('Extra_Field_Length'),
434 String('Filename' , sub { $_->ctx->{'Filename_Length' } }),
436 Field('Extra_Field' , sub { $_->ctx->{'Extra_Field_Length'} }),
437 Field('Compressed_Data', sub { $_->ctx->{'Compressed_Size' } }),
439 Anchor('Position'),
445 my $parser_central_directory_record = Array($number_of_files,
447 Struct('zip',
449 Const(
450 Bytes('Central_File_Header_Signature', 4), "\x50\x4B\x01\x02"
453 Struct('Version_Made_By',
454 ULInt8('Specification'),
455 ULInt8('Compatibility'),
458 ULInt16('Version_Needed_To_Extract'),
460 BitStruct('General_Purpose_Bit_Flag',
462 Padding(1),
464 Flag('Bit6' ),
465 Flag('Bit5' ),
466 Flag('Bit4' ),
467 Flag('Bit3' ),
468 Flag('Bit2' ),
469 Flag('Bit1' ),
470 Flag('Bit0' ),
472 Padding(2),
474 Flag('Bit13'),
475 Flag('Bit12'),
476 Flag('Bit11'),
478 Padding(3),
482 ULInt16('Compression_Method' ),
483 ULInt16('Last_Mod_File_Time' ),
484 ULInt16('Last_Mod_File_Date' ),
486 ULInt32('CRC-32' ),
487 ULInt32('Compressed_Size' ),
488 ULInt32('Uncompressed_Size' ),
490 ULInt16('Filename_Length' ),
491 ULInt16('Extra_Field_Length' ),
492 ULInt16('File_Comment_Length' ),
493 ULInt16('Disk_Number_Start' ),
494 ULInt16('Internal_File_Attributes' ),
496 ULInt32('External_File_Attributes' ),
497 ULInt32('Relative_Offset_Of_Local_Header'),
499 String('Filename' , sub { $_->ctx->{'Filename_Length' } }),
501 Field('Extra_Field' , sub { $_->ctx->{'Extra_Field_Length' } }),
502 Field('File_Comment', sub { $_->ctx->{'File_Comment_Length'} }),
508 seek $fh, 0, 0;
509 $stream = CreateStreamReader(File => $fh);
510 &dissect($parser_local_file_header->parse($stream), 'lfh');
511 #print Dumper $parser_local_file_header->parse($stream);
512 &dissect($parser_central_directory_record->parse($stream), 'cdr');