Added named arguments support
[archive-zip.git] / lib / Archive / Zip.pm
blob9221bdf62bd821e26b6b54f57967918b00962d84
1 package Archive::Zip;
3 use strict;
4 BEGIN {
5 require 5.003_96;
7 use UNIVERSAL ();
8 use Carp ();
9 use IO::File ();
10 use IO::Seekable ();
11 use Compress::Raw::Zlib ();
12 use File::Spec ();
13 use File::Temp ();
14 use FileHandle ();
16 use vars qw( $VERSION @ISA );
17 BEGIN {
18 $VERSION = '1.28';
20 require Exporter;
21 @ISA = qw( Exporter );
24 use vars qw( $ChunkSize $ErrorHandler );
25 BEGIN {
26 # This is the size we'll try to read, write, and (de)compress.
27 # You could set it to something different if you had lots of memory
28 # and needed more speed.
29 $ChunkSize ||= 32768;
31 $ErrorHandler = \&Carp::carp;
34 # BEGIN block is necessary here so that other modules can use the constants.
35 use vars qw( @EXPORT_OK %EXPORT_TAGS );
36 BEGIN {
37 @EXPORT_OK = ('computeCRC32');
38 %EXPORT_TAGS = (
39 CONSTANTS => [ qw(
40 FA_MSDOS
41 FA_UNIX
42 GPBF_ENCRYPTED_MASK
43 GPBF_DEFLATING_COMPRESSION_MASK
44 GPBF_HAS_DATA_DESCRIPTOR_MASK
45 COMPRESSION_STORED
46 COMPRESSION_DEFLATED
47 COMPRESSION_LEVEL_NONE
48 COMPRESSION_LEVEL_DEFAULT
49 COMPRESSION_LEVEL_FASTEST
50 COMPRESSION_LEVEL_BEST_COMPRESSION
51 IFA_TEXT_FILE_MASK
52 IFA_TEXT_FILE
53 IFA_BINARY_FILE
54 ) ],
56 MISC_CONSTANTS => [ qw(
57 FA_AMIGA
58 FA_VAX_VMS
59 FA_VM_CMS
60 FA_ATARI_ST
61 FA_OS2_HPFS
62 FA_MACINTOSH
63 FA_Z_SYSTEM
64 FA_CPM
65 FA_TOPS20
66 FA_WINDOWS_NTFS
67 FA_QDOS
68 FA_ACORN
69 FA_VFAT
70 FA_MVS
71 FA_BEOS
72 FA_TANDEM
73 FA_THEOS
74 GPBF_IMPLODING_8K_SLIDING_DICTIONARY_MASK
75 GPBF_IMPLODING_3_SHANNON_FANO_TREES_MASK
76 GPBF_IS_COMPRESSED_PATCHED_DATA_MASK
77 COMPRESSION_SHRUNK
78 DEFLATING_COMPRESSION_NORMAL
79 DEFLATING_COMPRESSION_MAXIMUM
80 DEFLATING_COMPRESSION_FAST
81 DEFLATING_COMPRESSION_SUPER_FAST
82 COMPRESSION_REDUCED_1
83 COMPRESSION_REDUCED_2
84 COMPRESSION_REDUCED_3
85 COMPRESSION_REDUCED_4
86 COMPRESSION_IMPLODED
87 COMPRESSION_TOKENIZED
88 COMPRESSION_DEFLATED_ENHANCED
89 COMPRESSION_PKWARE_DATA_COMPRESSION_LIBRARY_IMPLODED
90 ) ],
92 ERROR_CODES => [ qw(
93 AZ_OK
94 AZ_STREAM_END
95 AZ_ERROR
96 AZ_FORMAT_ERROR
97 AZ_IO_ERROR
98 ) ],
100 # For Internal Use Only
101 PKZIP_CONSTANTS => [ qw(
102 SIGNATURE_FORMAT
103 SIGNATURE_LENGTH
104 LOCAL_FILE_HEADER_SIGNATURE
105 LOCAL_FILE_HEADER_FORMAT
106 LOCAL_FILE_HEADER_LENGTH
107 CENTRAL_DIRECTORY_FILE_HEADER_SIGNATURE
108 DATA_DESCRIPTOR_FORMAT
109 DATA_DESCRIPTOR_LENGTH
110 DATA_DESCRIPTOR_SIGNATURE
111 DATA_DESCRIPTOR_FORMAT_NO_SIG
112 DATA_DESCRIPTOR_LENGTH_NO_SIG
113 CENTRAL_DIRECTORY_FILE_HEADER_FORMAT
114 CENTRAL_DIRECTORY_FILE_HEADER_LENGTH
115 END_OF_CENTRAL_DIRECTORY_SIGNATURE
116 END_OF_CENTRAL_DIRECTORY_SIGNATURE_STRING
117 END_OF_CENTRAL_DIRECTORY_FORMAT
118 END_OF_CENTRAL_DIRECTORY_LENGTH
119 ) ],
121 # For Internal Use Only
122 UTILITY_METHODS => [ qw(
123 _error
124 _printError
125 _ioError
126 _formatError
127 _subclassResponsibility
128 _binmode
129 _isSeekable
130 _newFileHandle
131 _readSignature
132 _asZipDirName
133 ) ],
136 # Add all the constant names and error code names to @EXPORT_OK
137 Exporter::export_ok_tags( qw(
138 CONSTANTS
139 ERROR_CODES
140 PKZIP_CONSTANTS
141 UTILITY_METHODS
142 MISC_CONSTANTS
143 ) );
147 # Error codes
148 use constant AZ_OK => 0;
149 use constant AZ_STREAM_END => 1;
150 use constant AZ_ERROR => 2;
151 use constant AZ_FORMAT_ERROR => 3;
152 use constant AZ_IO_ERROR => 4;
154 # File types
155 # Values of Archive::Zip::Member->fileAttributeFormat()
157 use constant FA_MSDOS => 0;
158 use constant FA_AMIGA => 1;
159 use constant FA_VAX_VMS => 2;
160 use constant FA_UNIX => 3;
161 use constant FA_VM_CMS => 4;
162 use constant FA_ATARI_ST => 5;
163 use constant FA_OS2_HPFS => 6;
164 use constant FA_MACINTOSH => 7;
165 use constant FA_Z_SYSTEM => 8;
166 use constant FA_CPM => 9;
167 use constant FA_TOPS20 => 10;
168 use constant FA_WINDOWS_NTFS => 11;
169 use constant FA_QDOS => 12;
170 use constant FA_ACORN => 13;
171 use constant FA_VFAT => 14;
172 use constant FA_MVS => 15;
173 use constant FA_BEOS => 16;
174 use constant FA_TANDEM => 17;
175 use constant FA_THEOS => 18;
177 # general-purpose bit flag masks
178 # Found in Archive::Zip::Member->bitFlag()
180 use constant GPBF_ENCRYPTED_MASK => 1 << 0;
181 use constant GPBF_DEFLATING_COMPRESSION_MASK => 3 << 1;
182 use constant GPBF_HAS_DATA_DESCRIPTOR_MASK => 1 << 3;
184 # deflating compression types, if compressionMethod == COMPRESSION_DEFLATED
185 # ( Archive::Zip::Member->bitFlag() & GPBF_DEFLATING_COMPRESSION_MASK )
187 use constant DEFLATING_COMPRESSION_NORMAL => 0 << 1;
188 use constant DEFLATING_COMPRESSION_MAXIMUM => 1 << 1;
189 use constant DEFLATING_COMPRESSION_FAST => 2 << 1;
190 use constant DEFLATING_COMPRESSION_SUPER_FAST => 3 << 1;
192 # compression method
194 # these two are the only ones supported in this module
195 use constant COMPRESSION_STORED => 0; # file is stored (no compression)
196 use constant COMPRESSION_DEFLATED => 8; # file is Deflated
197 use constant COMPRESSION_LEVEL_NONE => 0;
198 use constant COMPRESSION_LEVEL_DEFAULT => -1;
199 use constant COMPRESSION_LEVEL_FASTEST => 1;
200 use constant COMPRESSION_LEVEL_BEST_COMPRESSION => 9;
202 # internal file attribute bits
203 # Found in Archive::Zip::Member::internalFileAttributes()
205 use constant IFA_TEXT_FILE_MASK => 1;
206 use constant IFA_TEXT_FILE => 1;
207 use constant IFA_BINARY_FILE => 0;
209 # PKZIP file format miscellaneous constants (for internal use only)
210 use constant SIGNATURE_FORMAT => "V";
211 use constant SIGNATURE_LENGTH => 4;
213 # these lengths are without the signature.
214 use constant LOCAL_FILE_HEADER_SIGNATURE => 0x04034b50;
215 use constant LOCAL_FILE_HEADER_FORMAT => "v3 V4 v2";
216 use constant LOCAL_FILE_HEADER_LENGTH => 26;
218 # PKZIP docs don't mention the signature, but Info-Zip writes it.
219 use constant DATA_DESCRIPTOR_SIGNATURE => 0x08074b50;
220 use constant DATA_DESCRIPTOR_FORMAT => "V3";
221 use constant DATA_DESCRIPTOR_LENGTH => 12;
223 # but the signature is apparently optional.
224 use constant DATA_DESCRIPTOR_FORMAT_NO_SIG => "V2";
225 use constant DATA_DESCRIPTOR_LENGTH_NO_SIG => 8;
227 use constant CENTRAL_DIRECTORY_FILE_HEADER_SIGNATURE => 0x02014b50;
228 use constant CENTRAL_DIRECTORY_FILE_HEADER_FORMAT => "C2 v3 V4 v5 V2";
229 use constant CENTRAL_DIRECTORY_FILE_HEADER_LENGTH => 42;
231 use constant END_OF_CENTRAL_DIRECTORY_SIGNATURE => 0x06054b50;
232 use constant END_OF_CENTRAL_DIRECTORY_SIGNATURE_STRING =>
233 pack( "V", END_OF_CENTRAL_DIRECTORY_SIGNATURE );
234 use constant END_OF_CENTRAL_DIRECTORY_FORMAT => "v4 V2 v";
235 use constant END_OF_CENTRAL_DIRECTORY_LENGTH => 18;
237 use constant GPBF_IMPLODING_8K_SLIDING_DICTIONARY_MASK => 1 << 1;
238 use constant GPBF_IMPLODING_3_SHANNON_FANO_TREES_MASK => 1 << 2;
239 use constant GPBF_IS_COMPRESSED_PATCHED_DATA_MASK => 1 << 5;
241 # the rest of these are not supported in this module
242 use constant COMPRESSION_SHRUNK => 1; # file is Shrunk
243 use constant COMPRESSION_REDUCED_1 => 2; # file is Reduced CF=1
244 use constant COMPRESSION_REDUCED_2 => 3; # file is Reduced CF=2
245 use constant COMPRESSION_REDUCED_3 => 4; # file is Reduced CF=3
246 use constant COMPRESSION_REDUCED_4 => 5; # file is Reduced CF=4
247 use constant COMPRESSION_IMPLODED => 6; # file is Imploded
248 use constant COMPRESSION_TOKENIZED => 7; # reserved for Tokenizing compr.
249 use constant COMPRESSION_DEFLATED_ENHANCED => 9; # reserved for enh. Deflating
250 use constant COMPRESSION_PKWARE_DATA_COMPRESSION_LIBRARY_IMPLODED => 10;
252 # Load the various required classes
253 require Archive::Zip::Archive;
254 require Archive::Zip::Member;
255 require Archive::Zip::FileMember;
256 require Archive::Zip::DirectoryMember;
257 require Archive::Zip::ZipFileMember;
258 require Archive::Zip::NewFileMember;
259 require Archive::Zip::StringMember;
261 use constant ZIPARCHIVECLASS => 'Archive::Zip::Archive';
262 use constant ZIPMEMBERCLASS => 'Archive::Zip::Member';
264 # Convenience functions
266 sub _ISA ($$) {
267 # Can't rely on Scalar::Util, so use the next best way
268 local $@;
269 !! eval { ref $_[0] and $_[0]->isa($_[1]) };
272 sub _CAN ($$) {
273 local $@;
274 !! eval { ref $_[0] and $_[0]->can($_[1]) };
281 #####################################################################
282 # Methods
284 sub new {
285 my $class = shift;
286 return $class->ZIPARCHIVECLASS->new(@_);
289 sub computeCRC32 {
290 my ( $data, $crc );
292 if ( ref( $_[0] ) eq 'HASH' ) {
293 $data = $_[0]->{string};
294 $crc = $_[0]->{checksum};
296 else {
297 $data = shift;
298 $data = shift if ref($data);
299 $crc = shift;
302 return Compress::Raw::Zlib::crc32( $data, $crc );
305 # Report or change chunk size used for reading and writing.
306 # Also sets Zlib's default buffer size (eventually).
307 sub setChunkSize {
308 shift if ref( $_[0] ) eq 'Archive::Zip::Archive';
309 my $chunkSize = ( ref( $_[0] ) eq 'HASH' ) ? shift->{chunkSize} : shift;
310 my $oldChunkSize = $Archive::Zip::ChunkSize;
311 $Archive::Zip::ChunkSize = $chunkSize if ($chunkSize);
312 return $oldChunkSize;
315 sub chunkSize {
316 return $Archive::Zip::ChunkSize;
319 sub setErrorHandler {
320 my $errorHandler = ( ref( $_[0] ) eq 'HASH' ) ? shift->{subroutine} : shift;
321 $errorHandler = \&Carp::carp unless defined($errorHandler);
322 my $oldErrorHandler = $Archive::Zip::ErrorHandler;
323 $Archive::Zip::ErrorHandler = $errorHandler;
324 return $oldErrorHandler;
331 ######################################################################
332 # Private utility functions (not methods).
334 sub _printError {
335 my $string = join ( ' ', @_, "\n" );
336 my $oldCarpLevel = $Carp::CarpLevel;
337 $Carp::CarpLevel += 2;
338 &{$ErrorHandler} ($string);
339 $Carp::CarpLevel = $oldCarpLevel;
342 # This is called on format errors.
343 sub _formatError {
344 shift if ref( $_[0] );
345 _printError( 'format error:', @_ );
346 return AZ_FORMAT_ERROR;
349 # This is called on IO errors.
350 sub _ioError {
351 shift if ref( $_[0] );
352 _printError( 'IO error:', @_, ':', $! );
353 return AZ_IO_ERROR;
356 # This is called on generic errors.
357 sub _error {
358 shift if ref( $_[0] );
359 _printError( 'error:', @_ );
360 return AZ_ERROR;
363 # Called when a subclass should have implemented
364 # something but didn't
365 sub _subclassResponsibility {
366 Carp::croak("subclass Responsibility\n");
369 # Try to set the given file handle or object into binary mode.
370 sub _binmode {
371 my $fh = shift;
372 return _CAN( $fh, 'binmode' ) ? $fh->binmode() : binmode($fh);
375 # Attempt to guess whether file handle is seekable.
376 # Because of problems with Windows, this only returns true when
377 # the file handle is a real file.
378 sub _isSeekable {
379 my $fh = shift;
380 return 0 unless ref $fh;
381 if ( _ISA($fh, 'IO::Scalar') ) {
382 # IO::Scalar objects are brokenly-seekable
383 return 0;
385 if ( _ISA($fh, 'IO::String') ) {
386 return 1;
388 if ( _ISA($fh, 'IO::Seekable') ) {
389 # Unfortunately, some things like FileHandle objects
390 # return true for Seekable, but AREN'T!!!!!
391 if ( _ISA($fh, 'FileHandle') ) {
392 return 0;
393 } else {
394 return 1;
397 if ( _CAN($fh, 'stat') ) {
398 return -f $fh;
400 return (
401 _CAN($fh, 'seek') and _CAN($fh, 'tell')
402 ) ? 1 : 0;
405 # Print to the filehandle, while making sure the pesky Perl special global
406 # variables don't interfere.
407 sub _print
409 my ($self, $fh, @data) = @_;
411 local $\;
413 return $fh->print(@data);
416 # Return an opened IO::Handle
417 # my ( $status, fh ) = _newFileHandle( 'fileName', 'w' );
418 # Can take a filename, file handle, or ref to GLOB
419 # Or, if given something that is a ref but not an IO::Handle,
420 # passes back the same thing.
421 sub _newFileHandle {
422 my $fd = shift;
423 my $status = 1;
424 my $handle;
426 if ( ref($fd) ) {
427 if ( _ISA($fd, 'IO::Scalar') or _ISA($fd, 'IO::String') ) {
428 $handle = $fd;
429 } elsif ( _ISA($fd, 'IO::Handle') or ref($fd) eq 'GLOB' ) {
430 $handle = IO::File->new;
431 $status = $handle->fdopen( $fd, @_ );
432 } else {
433 $handle = $fd;
435 } else {
436 $handle = IO::File->new;
437 $status = $handle->open( $fd, @_ );
440 return ( $status, $handle );
443 # Returns next signature from given file handle, leaves
444 # file handle positioned afterwards.
445 # In list context, returns ($status, $signature)
446 # ( $status, $signature) = _readSignature( $fh, $fileName );
448 sub _readSignature {
449 my $fh = shift;
450 my $fileName = shift;
451 my $expectedSignature = shift; # optional
453 my $signatureData;
454 my $bytesRead = $fh->read( $signatureData, SIGNATURE_LENGTH );
455 if ( $bytesRead != SIGNATURE_LENGTH ) {
456 return _ioError("reading header signature");
458 my $signature = unpack( SIGNATURE_FORMAT, $signatureData );
459 my $status = AZ_OK;
461 # compare with expected signature, if any, or any known signature.
462 if ( ( defined($expectedSignature) && $signature != $expectedSignature )
463 || ( !defined($expectedSignature)
464 && $signature != CENTRAL_DIRECTORY_FILE_HEADER_SIGNATURE
465 && $signature != LOCAL_FILE_HEADER_SIGNATURE
466 && $signature != END_OF_CENTRAL_DIRECTORY_SIGNATURE
467 && $signature != DATA_DESCRIPTOR_SIGNATURE ) )
469 my $errmsg = sprintf( "bad signature: 0x%08x", $signature );
470 if ( _isSeekable($fh) )
472 $errmsg .=
473 sprintf( " at offset %d", $fh->tell() - SIGNATURE_LENGTH );
476 $status = _formatError("$errmsg in file $fileName");
479 return ( $status, $signature );
482 # Utility method to make and open a temp file.
483 # Will create $temp_dir if it doesn't exist.
484 # Returns file handle and name:
486 # my ($fh, $name) = Archive::Zip::tempFile();
487 # my ($fh, $name) = Archive::Zip::tempFile('mytempdir');
490 sub tempFile {
491 my $dir = ( ref( $_[0] ) eq 'HASH' ) ? shift->{tempDir} : shift;
492 my ( $fh, $filename ) = File::Temp::tempfile(
493 SUFFIX => '.zip',
494 UNLINK => 0, # we will delete it!
495 $dir ? ( DIR => $dir ) : ()
497 return ( undef, undef ) unless $fh;
498 my ( $status, $newfh ) = _newFileHandle( $fh, 'w+' );
499 return ( $newfh, $filename );
502 # Return the normalized directory name as used in a zip file (path
503 # separators become slashes, etc.).
504 # Will translate internal slashes in path components (i.e. on Macs) to
505 # underscores. Discards volume names.
506 # When $forceDir is set, returns paths with trailing slashes (or arrays
507 # with trailing blank members).
509 # If third argument is a reference, returns volume information there.
511 # input output
512 # . ('.') '.'
513 # ./a ('a') a
514 # ./a/b ('a','b') a/b
515 # ./a/b/ ('a','b') a/b
516 # a/b/ ('a','b') a/b
517 # /a/b/ ('','a','b') /a/b
518 # c:\a\b\c.doc ('','a','b','c.doc') /a/b/c.doc # on Windoze
519 # "i/o maps:whatever" ('i_o maps', 'whatever') "i_o maps/whatever" # on Macs
520 sub _asZipDirName
522 my $name = shift;
523 my $forceDir = shift;
524 my $volReturn = shift;
525 my ( $volume, $directories, $file ) =
526 File::Spec->splitpath( File::Spec->canonpath($name), $forceDir );
527 $$volReturn = $volume if ( ref($volReturn) );
528 my @dirs = map { $_ =~ s{/}{_}g; $_ } File::Spec->splitdir($directories);
529 if ( @dirs > 0 ) { pop (@dirs) unless $dirs[-1] } # remove empty component
530 push ( @dirs, defined($file) ? $file : '' );
531 #return wantarray ? @dirs : join ( '/', @dirs );
532 return join ( '/', @dirs );
535 # Return an absolute local name for a zip name.
536 # Assume a directory if zip name has trailing slash.
537 # Takes an optional volume name in FS format (like 'a:').
539 sub _asLocalName
541 my $name = shift; # zip format
542 my $volume = shift;
543 $volume = '' unless defined($volume); # local FS format
545 my @paths = split ( /\//, $name );
546 my $filename = pop (@paths);
547 $filename = '' unless defined($filename);
548 my $localDirs = @paths?File::Spec->catdir(@paths):'';
549 my $localName = File::Spec->catpath( $volume, $localDirs, $filename );
550 use Cwd;
551 $localName = File::Spec->catfile(getcwd, $localName) unless $volume;
552 return $localName;
557 __END__
559 =pod
561 =head1 NAME
563 Archive::Zip - Provide an interface to ZIP archive files.
565 =head1 SYNOPSIS
567 # Create a Zip file
568 use Archive::Zip qw( :ERROR_CODES :CONSTANTS );
569 my $zip = Archive::Zip->new();
571 # Add a directory
572 my $dir_member = $zip->addDirectory( 'dirname/' );
574 # Add a file from a string with compression
575 my $string_member = $zip->addString( 'This is a test', 'stringMember.txt' );
576 $string_member->desiredCompressionMethod( COMPRESSION_DEFLATED );
578 # Add a file from disk
579 my $file_member = $zip->addFile( 'xyz.pl', 'AnotherName.pl' );
581 # Save the Zip file
582 unless ( $zip->writeToFileNamed('someZip.zip') == AZ_OK ) {
583 die 'write error';
586 # Read a Zip file
587 my $somezip = Archive::Zip->new();
588 unless ( $somezip->read( 'someZip.zip' ) == AZ_OK ) {
589 die 'read error';
592 # Change the compression type for a file in the Zip
593 my $member = $somezip->memberNamed( 'stringMember.txt' );
594 $member->desiredCompressionMethod( COMPRESSION_STORED );
595 unless ( $zip->writeToFileNamed( 'someOtherZip.zip' ) == AZ_OK ) {
596 die 'write error';
599 =head1 DESCRIPTION
601 The Archive::Zip module allows a Perl program to create, manipulate, read,
602 and write Zip archive files.
604 Zip archives can be created, or you can read from existing zip files.
606 Once created, they can be written to files, streams, or strings. Members
607 can be added, removed, extracted, replaced, rearranged, and enumerated.
608 They can also be renamed or have their dates, comments, or other attributes
609 queried or modified. Their data can be compressed or uncompressed as needed.
611 Members can be created from members in existing Zip files, or from existing
612 directories, files, or strings.
614 This module uses the L<Compress::Raw::Zlib> library to read and write the
615 compressed streams inside the files.
617 One can use L<Archive::Zip::MemberRead> to read the zip file archive members
618 as if they were files.
620 =head2 File Naming
622 Regardless of what your local file system uses for file naming, names in a
623 Zip file are in Unix format (I<forward> slashes (/) separating directory
624 names, etc.).
626 C<Archive::Zip> tries to be consistent with file naming conventions, and will
627 translate back and forth between native and Zip file names.
629 However, it can't guess which format names are in. So two rules control what
630 kind of file name you must pass various routines:
632 =over 4
634 =item Names of files are in local format.
636 C<File::Spec> and C<File::Basename> are used for various file
637 operations. When you're referring to a file on your system, use its
638 file naming conventions.
640 =item Names of archive members are in Unix format.
642 This applies to every method that refers to an archive member, or
643 provides a name for new archive members. The C<extract()> methods
644 that can take one or two names will convert from local to zip names
645 if you call them with a single name.
647 =back
649 =head2 Archive::Zip Object Model
651 =head2 Overview
653 Archive::Zip::Archive objects are what you ordinarily deal with.
654 These maintain the structure of a zip file, without necessarily
655 holding data. When a zip is read from a disk file, the (possibly
656 compressed) data still lives in the file, not in memory. Archive
657 members hold information about the individual members, but not
658 (usually) the actual member data. When the zip is written to a
659 (different) file, the member data is compressed or copied as needed.
660 It is possible to make archive members whose data is held in a string
661 in memory, but this is not done when a zip file is read. Directory
662 members don't have any data.
664 =head2 Inheritance
666 Exporter
667 Archive::Zip Common base class, has defs.
668 Archive::Zip::Archive A Zip archive.
669 Archive::Zip::Member Abstract superclass for all members.
670 Archive::Zip::StringMember Member made from a string
671 Archive::Zip::FileMember Member made from an external file
672 Archive::Zip::ZipFileMember Member that lives in a zip file
673 Archive::Zip::NewFileMember Member whose data is in a file
674 Archive::Zip::DirectoryMember Member that is a directory
676 =head1 EXPORTS
678 =over 4
680 =item :CONSTANTS
682 Exports the following constants:
684 FA_MSDOS FA_UNIX GPBF_ENCRYPTED_MASK
685 GPBF_DEFLATING_COMPRESSION_MASK GPBF_HAS_DATA_DESCRIPTOR_MASK
686 COMPRESSION_STORED COMPRESSION_DEFLATED IFA_TEXT_FILE_MASK
687 IFA_TEXT_FILE IFA_BINARY_FILE COMPRESSION_LEVEL_NONE
688 COMPRESSION_LEVEL_DEFAULT COMPRESSION_LEVEL_FASTEST
689 COMPRESSION_LEVEL_BEST_COMPRESSION
691 =item :MISC_CONSTANTS
693 Exports the following constants (only necessary for extending the
694 module):
696 FA_AMIGA FA_VAX_VMS FA_VM_CMS FA_ATARI_ST FA_OS2_HPFS
697 FA_MACINTOSH FA_Z_SYSTEM FA_CPM FA_WINDOWS_NTFS
698 GPBF_IMPLODING_8K_SLIDING_DICTIONARY_MASK
699 GPBF_IMPLODING_3_SHANNON_FANO_TREES_MASK
700 GPBF_IS_COMPRESSED_PATCHED_DATA_MASK COMPRESSION_SHRUNK
701 DEFLATING_COMPRESSION_NORMAL DEFLATING_COMPRESSION_MAXIMUM
702 DEFLATING_COMPRESSION_FAST DEFLATING_COMPRESSION_SUPER_FAST
703 COMPRESSION_REDUCED_1 COMPRESSION_REDUCED_2 COMPRESSION_REDUCED_3
704 COMPRESSION_REDUCED_4 COMPRESSION_IMPLODED COMPRESSION_TOKENIZED
705 COMPRESSION_DEFLATED_ENHANCED
706 COMPRESSION_PKWARE_DATA_COMPRESSION_LIBRARY_IMPLODED
708 =item :ERROR_CODES
710 Explained below. Returned from most methods.
712 AZ_OK AZ_STREAM_END AZ_ERROR AZ_FORMAT_ERROR AZ_IO_ERROR
714 =back
716 =head1 ERROR CODES
718 Many of the methods in Archive::Zip return error codes. These are implemented
719 as inline subroutines, using the C<use constant> pragma. They can be imported
720 into your namespace using the C<:ERROR_CODES> tag:
722 use Archive::Zip qw( :ERROR_CODES );
726 unless ( $zip->read( 'myfile.zip' ) == AZ_OK ) {
727 die "whoops!";
730 =over 4
732 =item AZ_OK (0)
734 Everything is fine.
736 =item AZ_STREAM_END (1)
738 The read stream (or central directory) ended normally.
740 =item AZ_ERROR (2)
742 There was some generic kind of error.
744 =item AZ_FORMAT_ERROR (3)
746 There is a format error in a ZIP file being read.
748 =item AZ_IO_ERROR (4)
750 There was an IO error.
752 =back
754 =head2 Compression
756 Archive::Zip allows each member of a ZIP file to be compressed (using the
757 Deflate algorithm) or uncompressed.
759 Other compression algorithms that some versions of ZIP have been able to
760 produce are not supported. Each member has two compression methods: the
761 one it's stored as (this is always COMPRESSION_STORED for string and external
762 file members), and the one you desire for the member in the zip file.
764 These can be different, of course, so you can make a zip member that is not
765 compressed out of one that is, and vice versa.
767 You can inquire about the current compression and set the desired
768 compression method:
770 my $member = $zip->memberNamed( 'xyz.txt' );
771 $member->compressionMethod(); # return current compression
773 # set to read uncompressed
774 $member->desiredCompressionMethod( COMPRESSION_STORED );
776 # set to read compressed
777 $member->desiredCompressionMethod( COMPRESSION_DEFLATED );
779 There are two different compression methods:
781 =over 4
783 =item COMPRESSION_STORED
785 File is stored (no compression)
787 =item COMPRESSION_DEFLATED
789 File is Deflated
791 =back
793 =head2 Compression Levels
795 If a member's desiredCompressionMethod is COMPRESSION_DEFLATED, you
796 can choose different compression levels. This choice may affect the
797 speed of compression and decompression, as well as the size of the
798 compressed member data.
800 $member->desiredCompressionLevel( 9 );
802 The levels given can be:
804 =over 4
806 =item 0 or COMPRESSION_LEVEL_NONE
808 This is the same as saying
810 $member->desiredCompressionMethod( COMPRESSION_STORED );
812 =item 1 .. 9
814 1 gives the best speed and worst compression, and 9 gives the
815 best compression and worst speed.
817 =item COMPRESSION_LEVEL_FASTEST
819 This is a synonym for level 1.
821 =item COMPRESSION_LEVEL_BEST_COMPRESSION
823 This is a synonym for level 9.
825 =item COMPRESSION_LEVEL_DEFAULT
827 This gives a good compromise between speed and compression,
828 and is currently equivalent to 6 (this is in the zlib code).
829 This is the level that will be used if not specified.
831 =back
833 =head1 Archive::Zip Methods
835 The Archive::Zip class (and its invisible subclass Archive::Zip::Archive)
836 implement generic zip file functionality. Creating a new Archive::Zip object
837 actually makes an Archive::Zip::Archive object, but you don't have to worry
838 about this unless you're subclassing.
840 =head2 Constructor
842 =over 4
844 =item new( [$fileName] )
846 Make a new, empty zip archive.
848 my $zip = Archive::Zip->new();
850 If an additional argument is passed, new() will call read()
851 to read the contents of an archive:
853 my $zip = Archive::Zip->new( 'xyz.zip' );
855 If a filename argument is passed and the read fails for any
856 reason, new will return undef. For this reason, it may be
857 better to call read separately.
859 =back
861 =head2 Zip Archive Utility Methods
863 These Archive::Zip methods may be called as functions or as object
864 methods. Do not call them as class methods:
866 $zip = Archive::Zip->new();
867 $crc = Archive::Zip::computeCRC32( 'ghijkl' ); # OK
868 $crc = $zip->computeCRC32( 'ghijkl' ); # also OK
869 $crc = Archive::Zip->computeCRC32( 'ghijkl' ); # NOT OK
871 =over 4
873 =item Archive::Zip::computeCRC32( $string [, $crc] )
875 This is a utility function that uses the Compress::Raw::Zlib CRC
876 routine to compute a CRC-32. You can get the CRC of a string:
878 $crc = Archive::Zip::computeCRC32( $string );
880 Or you can compute the running CRC:
882 $crc = 0;
883 $crc = Archive::Zip::computeCRC32( 'abcdef', $crc );
884 $crc = Archive::Zip::computeCRC32( 'ghijkl', $crc );
886 =item Archive::Zip::setChunkSize( $number )
888 Report or change chunk size used for reading and writing.
889 This can make big differences in dealing with large files.
890 Currently, this defaults to 32K. This also changes the chunk
891 size used for Compress::Raw::Zlib. You must call setChunkSize()
892 before reading or writing. This is not exportable, so you
893 must call it like:
895 Archive::Zip::setChunkSize( 4096 );
897 or as a method on a zip (though this is a global setting).
898 Returns old chunk size.
900 =item Archive::Zip::chunkSize()
902 Returns the current chunk size:
904 my $chunkSize = Archive::Zip::chunkSize();
906 =item Archive::Zip::setErrorHandler( \&subroutine )
908 Change the subroutine called with error strings. This
909 defaults to \&Carp::carp, but you may want to change it to
910 get the error strings. This is not exportable, so you must
911 call it like:
913 Archive::Zip::setErrorHandler( \&myErrorHandler );
915 If myErrorHandler is undef, resets handler to default.
916 Returns old error handler. Note that if you call Carp::carp
917 or a similar routine or if you're chaining to the default
918 error handler from your error handler, you may want to
919 increment the number of caller levels that are skipped (do
920 not just set it to a number):
922 $Carp::CarpLevel++;
924 =item Archive::Zip::tempFile( [$tmpdir] )
926 Create a uniquely named temp file. It will be returned open
927 for read/write. If C<$tmpdir> is given, it is used as the
928 name of a directory to create the file in. If not given,
929 creates the file using C<File::Spec::tmpdir()>. Generally, you can
930 override this choice using the
932 $ENV{TMPDIR}
934 environment variable. But see the L<File::Spec|File::Spec>
935 documentation for your system. Note that on many systems, if you're
936 running in taint mode, then you must make sure that C<$ENV{TMPDIR}> is
937 untainted for it to be used.
938 Will I<NOT> create C<$tmpdir> if it doesn't exist (this is a change
939 from prior versions!). Returns file handle and name:
941 my ($fh, $name) = Archive::Zip::tempFile();
942 my ($fh, $name) = Archive::Zip::tempFile('myTempDir');
943 my $fh = Archive::Zip::tempFile(); # if you don't need the name
945 =back
947 =head2 Zip Archive Accessors
949 =over 4
951 =item members()
953 Return a copy of the members array
955 my @members = $zip->members();
957 =item numberOfMembers()
959 Return the number of members I have
961 =item memberNames()
963 Return a list of the (internal) file names of the zip members
965 =item memberNamed( $string )
967 Return ref to member whose filename equals given filename or
968 undef. C<$string> must be in Zip (Unix) filename format.
970 =item membersMatching( $regex )
972 Return array of members whose filenames match given regular
973 expression in list context. Returns number of matching
974 members in scalar context.
976 my @textFileMembers = $zip->membersMatching( '.*\.txt' );
977 # or
978 my $numberOfTextFiles = $zip->membersMatching( '.*\.txt' );
980 =item diskNumber()
982 Return the disk that I start on. Not used for writing zips,
983 but might be interesting if you read a zip in. This should be
984 0, as Archive::Zip does not handle multi-volume archives.
986 =item diskNumberWithStartOfCentralDirectory()
988 Return the disk number that holds the beginning of the
989 central directory. Not used for writing zips, but might be
990 interesting if you read a zip in. This should be 0, as
991 Archive::Zip does not handle multi-volume archives.
993 =item numberOfCentralDirectoriesOnThisDisk()
995 Return the number of CD structures in the zipfile last read in.
996 Not used for writing zips, but might be interesting if you read a zip
999 =item numberOfCentralDirectories()
1001 Return the number of CD structures in the zipfile last read in.
1002 Not used for writing zips, but might be interesting if you read a zip
1005 =item centralDirectorySize()
1007 Returns central directory size, as read from an external zip
1008 file. Not used for writing zips, but might be interesting if
1009 you read a zip in.
1011 =item centralDirectoryOffsetWRTStartingDiskNumber()
1013 Returns the offset into the zip file where the CD begins. Not
1014 used for writing zips, but might be interesting if you read a
1015 zip in.
1017 =item zipfileComment( [$string] )
1019 Get or set the zipfile comment. Returns the old comment.
1021 print $zip->zipfileComment();
1022 $zip->zipfileComment( 'New Comment' );
1024 =item eocdOffset()
1026 Returns the (unexpected) number of bytes between where the
1027 EOCD was found and where it expected to be. This is normally
1028 0, but would be positive if something (a virus, perhaps) had
1029 added bytes somewhere before the EOCD. Not used for writing
1030 zips, but might be interesting if you read a zip in. Here is
1031 an example of how you can diagnose this:
1033 my $zip = Archive::Zip->new('somefile.zip');
1034 if ($zip->eocdOffset())
1036 warn "A virus has added ", $zip->eocdOffset, " bytes of garbage\n";
1039 The C<eocdOffset()> is used to adjust the starting position of member
1040 headers, if necessary.
1042 =item fileName()
1044 Returns the name of the file last read from. If nothing has
1045 been read yet, returns an empty string; if read from a file
1046 handle, returns the handle in string form.
1048 =back
1050 =head2 Zip Archive Member Operations
1052 Various operations on a zip file modify members. When a member is
1053 passed as an argument, you can either use a reference to the member
1054 itself, or the name of a member. Of course, using the name requires
1055 that names be unique within a zip (this is not enforced).
1057 =over 4
1059 =item removeMember( $memberOrName )
1061 Remove and return the given member, or match its name and
1062 remove it. Returns undef if member or name doesn't exist in this
1063 Zip. No-op if member does not belong to this zip.
1065 =item replaceMember( $memberOrName, $newMember )
1067 Remove and return the given member, or match its name and
1068 remove it. Replace with new member. Returns undef if member or
1069 name doesn't exist in this Zip, or if C<$newMember> is undefined.
1071 It is an (undiagnosed) error to provide a C<$newMember> that is a
1072 member of the zip being modified.
1074 my $member1 = $zip->removeMember( 'xyz' );
1075 my $member2 = $zip->replaceMember( 'abc', $member1 );
1076 # now, $member2 (named 'abc') is not in $zip,
1077 # and $member1 (named 'xyz') is, having taken $member2's place.
1079 =item extractMember( $memberOrName [, $extractedName ] )
1081 Extract the given member, or match its name and extract it.
1082 Returns undef if member doesn't exist in this Zip. If
1083 optional second arg is given, use it as the name of the
1084 extracted member. Otherwise, the internal filename of the
1085 member is used as the name of the extracted file or
1086 directory.
1087 If you pass C<$extractedName>, it should be in the local file
1088 system's format.
1089 All necessary directories will be created. Returns C<AZ_OK>
1090 on success.
1092 =item extractMemberWithoutPaths( $memberOrName [, $extractedName ] )
1094 Extract the given member, or match its name and extract it.
1095 Does not use path information (extracts into the current
1096 directory). Returns undef if member doesn't exist in this
1097 Zip.
1098 If optional second arg is given, use it as the name of the
1099 extracted member (its paths will be deleted too). Otherwise,
1100 the internal filename of the member (minus paths) is used as
1101 the name of the extracted file or directory. Returns C<AZ_OK>
1102 on success.
1104 =item addMember( $member )
1106 Append a member (possibly from another zip file) to the zip
1107 file. Returns the new member. Generally, you will use
1108 addFile(), addDirectory(), addFileOrDirectory(), addString(),
1109 or read() to add members.
1111 # Move member named 'abc' to end of zip:
1112 my $member = $zip->removeMember( 'abc' );
1113 $zip->addMember( $member );
1115 =item updateMember( $memberOrName, $fileName )
1117 Update a single member from the file or directory named C<$fileName>.
1118 Returns the (possibly added or updated) member, if any; C<undef> on
1119 errors.
1120 The comparison is based on C<lastModTime()> and (in the case of a
1121 non-directory) the size of the file.
1123 =item addFile( $fileName [, $newName ] )
1125 Append a member whose data comes from an external file,
1126 returning the member or undef. The member will have its file
1127 name set to the name of the external file, and its
1128 desiredCompressionMethod set to COMPRESSION_DEFLATED. The
1129 file attributes and last modification time will be set from
1130 the file.
1131 If the name given does not represent a readable plain file or
1132 symbolic link, undef will be returned. C<$fileName> must be
1133 in the format required for the local file system.
1134 The optional C<$newName> argument sets the internal file name
1135 to something different than the given $fileName. C<$newName>,
1136 if given, must be in Zip name format (i.e. Unix).
1137 The text mode bit will be set if the contents appears to be
1138 text (as returned by the C<-T> perl operator).
1141 I<NOTE> that you shouldn't (generally) use absolute path names
1142 in zip member names, as this will cause problems with some zip
1143 tools as well as introduce a security hole and make the zip
1144 harder to use.
1146 =item addDirectory( $directoryName [, $fileName ] )
1150 Append a member created from the given directory name. The
1151 directory name does not have to name an existing directory.
1152 If the named directory exists, the file modification time and
1153 permissions are set from the existing directory, otherwise
1154 they are set to now and permissive default permissions.
1155 C<$directoryName> must be in local file system format.
1156 The optional second argument sets the name of the archive
1157 member (which defaults to C<$directoryName>). If given, it
1158 must be in Zip (Unix) format.
1159 Returns the new member.
1161 =item addFileOrDirectory( $name [, $newName ] )
1165 Append a member from the file or directory named $name. If
1166 $newName is given, use it for the name of the new member.
1167 Will add or remove trailing slashes from $newName as needed.
1168 C<$name> must be in local file system format.
1169 The optional second argument sets the name of the archive
1170 member (which defaults to C<$name>). If given, it must be in
1171 Zip (Unix) format.
1173 =item addString( $stringOrStringRef, $name )
1177 Append a member created from the given string or string
1178 reference. The name is given by the second argument.
1179 Returns the new member. The last modification time will be
1180 set to now, and the file attributes will be set to permissive
1181 defaults.
1183 my $member = $zip->addString( 'This is a test', 'test.txt' );
1185 =item contents( $memberOrMemberName [, $newContents ] )
1189 Returns the uncompressed data for a particular member, or
1190 undef.
1192 print "xyz.txt contains " . $zip->contents( 'xyz.txt' );
1194 Also can change the contents of a member:
1196 $zip->contents( 'xyz.txt', 'This is the new contents' );
1198 If called expecting an array as the return value, it will include
1199 the status as the second value in the array.
1201 ($content, $status) = $zip->contents( 'xyz.txt');
1203 =back
1205 =head2 Zip Archive I/O operations
1208 A Zip archive can be written to a file or file handle, or read from
1209 one.
1211 =over 4
1213 =item writeToFileNamed( $fileName )
1217 Write a zip archive to named file. Returns C<AZ_OK> on
1218 success.
1220 my $status = $zip->writeToFileNamed( 'xx.zip' );
1221 die "error somewhere" if $status != AZ_OK;
1223 Note that if you use the same name as an existing zip file
1224 that you read in, you will clobber ZipFileMembers. So
1225 instead, write to a different file name, then delete the
1226 original.
1227 If you use the C<overwrite()> or C<overwriteAs()> methods, you can
1228 re-write the original zip in this way.
1229 C<$fileName> should be a valid file name on your system.
1231 =item writeToFileHandle( $fileHandle [, $seekable] )
1233 Write a zip archive to a file handle. Return AZ_OK on
1234 success. The optional second arg tells whether or not to try
1235 to seek backwards to re-write headers. If not provided, it is
1236 set if the Perl C<-f> test returns true. This could fail on
1237 some operating systems, though.
1239 my $fh = IO::File->new( 'someFile.zip', 'w' );
1240 unless ( $zip->writeToFileHandle( $fh ) == AZ_OK ) {
1241 # error handling
1244 If you pass a file handle that is not seekable (like if
1245 you're writing to a pipe or a socket), pass a false second
1246 argument:
1248 my $fh = IO::File->new( '| cat > somefile.zip', 'w' );
1249 $zip->writeToFileHandle( $fh, 0 ); # fh is not seekable
1251 If this method fails during the write of a member, that
1252 member and all following it will return false from
1253 C<wasWritten()>. See writeCentralDirectory() for a way to
1254 deal with this.
1255 If you want, you can write data to the file handle before
1256 passing it to writeToFileHandle(); this could be used (for
1257 instance) for making self-extracting archives. However, this
1258 only works reliably when writing to a real file (as opposed
1259 to STDOUT or some other possible non-file).
1261 See examples/selfex.pl for how to write a self-extracting
1262 archive.
1264 =item writeCentralDirectory( $fileHandle [, $offset ] )
1266 Writes the central directory structure to the given file
1267 handle.
1269 Returns AZ_OK on success. If given an $offset, will
1270 seek to that point before writing. This can be used for
1271 recovery in cases where writeToFileHandle or writeToFileNamed
1272 returns an IO error because of running out of space on the
1273 destination file.
1275 You can truncate the zip by seeking backwards and then writing the
1276 directory:
1278 my $fh = IO::File->new( 'someFile.zip', 'w' );
1279 my $retval = $zip->writeToFileHandle( $fh );
1280 if ( $retval == AZ_IO_ERROR ) {
1281 my @unwritten = grep { not $_->wasWritten() } $zip->members();
1282 if (@unwritten) {
1283 $zip->removeMember( $member ) foreach my $member ( @unwritten );
1284 $zip->writeCentralDirectory( $fh,
1285 $unwritten[0]->writeLocalHeaderRelativeOffset());
1289 =item overwriteAs( $newName )
1291 Write the zip to the specified file, as safely as possible.
1292 This is done by first writing to a temp file, then renaming
1293 the original if it exists, then renaming the temp file, then
1294 deleting the renamed original if it exists. Returns AZ_OK if
1295 successful.
1297 =item overwrite()
1299 Write back to the original zip file. See overwriteAs() above.
1300 If the zip was not ever read from a file, this generates an
1301 error.
1303 =item read( $fileName )
1305 Read zipfile headers from a zip file, appending new members.
1306 Returns C<AZ_OK> or error code.
1308 my $zipFile = Archive::Zip->new();
1309 my $status = $zipFile->read( '/some/FileName.zip' );
1311 =item readFromFileHandle( $fileHandle, $filename )
1313 Read zipfile headers from an already-opened file handle,
1314 appending new members. Does not close the file handle.
1315 Returns C<AZ_OK> or error code. Note that this requires a
1316 seekable file handle; reading from a stream is not yet
1317 supported.
1319 my $fh = IO::File->new( '/some/FileName.zip', 'r' );
1320 my $zip1 = Archive::Zip->new();
1321 my $status = $zip1->readFromFileHandle( $fh );
1322 my $zip2 = Archive::Zip->new();
1323 $status = $zip2->readFromFileHandle( $fh );
1325 =back
1327 =head2 Zip Archive Tree operations
1329 These used to be in Archive::Zip::Tree but got moved into
1330 Archive::Zip. They enable operation on an entire tree of members or
1331 files.
1332 A usage example:
1334 use Archive::Zip;
1335 my $zip = Archive::Zip->new();
1337 # add all readable files and directories below . as xyz/*
1338 $zip->addTree( '.', 'xyz' );
1340 # add all readable plain files below /abc as def/*
1341 $zip->addTree( '/abc', 'def', sub { -f && -r } );
1343 # add all .c files below /tmp as stuff/*
1344 $zip->addTreeMatching( '/tmp', 'stuff', '\.c$' );
1346 # add all .o files below /tmp as stuff/* if they aren't writable
1347 $zip->addTreeMatching( '/tmp', 'stuff', '\.o$', sub { ! -w } );
1349 # add all .so files below /tmp that are smaller than 200 bytes as stuff/*
1350 $zip->addTreeMatching( '/tmp', 'stuff', '\.o$', sub { -s < 200 } );
1352 # and write them into a file
1353 $zip->writeToFileNamed('xxx.zip');
1355 # now extract the same files into /tmpx
1356 $zip->extractTree( 'stuff', '/tmpx' );
1358 =over 4
1360 =item $zip->addTree( $root, $dest [,$pred] ) -- Add tree of files to a zip
1362 C<$root> is the root of the tree of files and directories to be
1363 added. It is a valid directory name on your system. C<$dest> is
1364 the name for the root in the zip file (undef or blank means
1365 to use relative pathnames). It is a valid ZIP directory name
1366 (that is, it uses forward slashes (/) for separating
1367 directory components). C<$pred> is an optional subroutine
1368 reference to select files: it is passed the name of the
1369 prospective file or directory using C<$_>, and if it returns
1370 true, the file or directory will be included. The default is
1371 to add all readable files and directories. For instance,
1372 using
1374 my $pred = sub { /\.txt/ };
1375 $zip->addTree( '.', '', $pred );
1377 will add all the .txt files in and below the current
1378 directory, using relative names, and making the names
1379 identical in the zipfile:
1381 original name zip member name
1382 ./xyz xyz
1383 ./a/ a/
1384 ./a/b a/b
1386 To translate absolute to relative pathnames, just pass them
1387 in: $zip->addTree( '/c/d', 'a' );
1389 original name zip member name
1390 /c/d/xyz a/xyz
1391 /c/d/a/ a/a/
1392 /c/d/a/b a/a/b
1394 Returns AZ_OK on success. Note that this will not follow
1395 symbolic links to directories. Note also that this does not
1396 check for the validity of filenames.
1398 Note that you generally I<don't> want to make zip archive member names
1399 absolute.
1401 =item $zip->addTreeMatching( $root, $dest, $pattern [,$pred] )
1403 $root is the root of the tree of files and directories to be
1404 added $dest is the name for the root in the zip file (undef
1405 means to use relative pathnames) $pattern is a (non-anchored)
1406 regular expression for filenames to match $pred is an
1407 optional subroutine reference to select files: it is passed
1408 the name of the prospective file or directory in C<$_>, and
1409 if it returns true, the file or directory will be included.
1410 The default is to add all readable files and directories. To
1411 add all files in and below the current dirctory whose names
1412 end in C<.pl>, and make them extract into a subdirectory
1413 named C<xyz>, do this:
1415 $zip->addTreeMatching( '.', 'xyz', '\.pl$' )
1417 To add all I<writable> files in and below the dirctory named
1418 C</abc> whose names end in C<.pl>, and make them extract into
1419 a subdirectory named C<xyz>, do this:
1421 $zip->addTreeMatching( '/abc', 'xyz', '\.pl$', sub { -w } )
1423 Returns AZ_OK on success. Note that this will not follow
1424 symbolic links to directories.
1426 =item $zip->updateTree( $root, [ $dest, [ $pred [, $mirror]]] );
1430 Update a zip file from a directory tree.
1432 C<updateTree()> takes the same arguments as C<addTree()>, but first
1433 checks to see whether the file or directory already exists in the zip
1434 file, and whether it has been changed.
1436 If the fourth argument C<$mirror> is true, then delete all my members
1437 if corresponding files weren't found.
1440 Returns an error code or AZ_OK if all is well.
1442 =item $zip->extractTree()
1446 =item $zip->extractTree( $root )
1450 =item $zip->extractTree( $root, $dest )
1454 =item $zip->extractTree( $root, $dest, $volume )
1458 If you don't give any arguments at all, will extract all the
1459 files in the zip with their original names.
1462 If you supply one argument for C<$root>, C<extractTree> will extract
1463 all the members whose names start with C<$root> into the current
1464 directory, stripping off C<$root> first.
1465 C<$root> is in Zip (Unix) format.
1466 For instance,
1468 $zip->extractTree( 'a' );
1470 when applied to a zip containing the files:
1471 a/x a/b/c ax/d/e d/e will extract:
1474 a/x as ./x
1477 a/b/c as ./b/c
1480 If you give two arguments, C<extractTree> extracts all the members
1481 whose names start with C<$root>. It will translate C<$root> into
1482 C<$dest> to construct the destination file name.
1483 C<$root> and C<$dest> are in Zip (Unix) format.
1484 For instance,
1486 $zip->extractTree( 'a', 'd/e' );
1488 when applied to a zip containing the files:
1489 a/x a/b/c ax/d/e d/e will extract:
1492 a/x to d/e/x
1495 a/b/c to d/e/b/c and ignore ax/d/e and d/e
1498 If you give three arguments, C<extractTree> extracts all the members
1499 whose names start with C<$root>. It will translate C<$root> into
1500 C<$dest> to construct the destination file name, and then it will
1501 convert to local file system format, using C<$volume> as the name of
1502 the destination volume.
1505 C<$root> and C<$dest> are in Zip (Unix) format.
1508 C<$volume> is in local file system format.
1511 For instance, under Windows,
1513 $zip->extractTree( 'a', 'd/e', 'f:' );
1515 when applied to a zip containing the files:
1516 a/x a/b/c ax/d/e d/e will extract:
1519 a/x to f:d/e/x
1522 a/b/c to f:d/e/b/c and ignore ax/d/e and d/e
1525 If you want absolute paths (the prior example used paths relative to
1526 the current directory on the destination volume, you can specify these
1527 in C<$dest>:
1529 $zip->extractTree( 'a', '/d/e', 'f:' );
1531 when applied to a zip containing the files:
1532 a/x a/b/c ax/d/e d/e will extract:
1535 a/x to f:\d\e\x
1538 a/b/c to f:\d\e\b\c and ignore ax/d/e and d/e
1540 Returns an error code or AZ_OK if everything worked OK.
1542 =back
1544 =head1 MEMBER OPERATIONS
1547 =head2 Member Class Methods
1550 Several constructors allow you to construct members without adding
1551 them to a zip archive. These work the same as the addFile(),
1552 addDirectory(), and addString() zip instance methods described above,
1553 but they don't add the new members to a zip.
1555 =over 4
1557 =item Archive::Zip::Member->newFromString( $stringOrStringRef [, $fileName] )
1561 Construct a new member from the given string. Returns undef
1562 on error.
1564 my $member = Archive::Zip::Member->newFromString( 'This is a test',
1565 'xyz.txt' );
1567 =item newFromFile( $fileName )
1571 Construct a new member from the given file. Returns undef on
1572 error.
1574 my $member = Archive::Zip::Member->newFromFile( 'xyz.txt' );
1576 =item newDirectoryNamed( $directoryName [, $zipname ] )
1580 Construct a new member from the given directory.
1581 C<$directoryName> must be a valid name on your file system; it doesn't
1582 have to exist.
1585 If given, C<$zipname> will be the name of the zip member; it must be a
1586 valid Zip (Unix) name. If not given, it will be converted from
1587 C<$directoryName>.
1590 Returns undef on error.
1592 my $member = Archive::Zip::Member->newDirectoryNamed( 'CVS/' );
1594 =back
1596 =head2 Member Simple accessors
1599 These methods get (and/or set) member attribute values.
1601 =over 4
1603 =item versionMadeBy()
1607 Gets the field from the member header.
1609 =item fileAttributeFormat( [$format] )
1613 Gets or sets the field from the member header. These are
1614 C<FA_*> values.
1616 =item versionNeededToExtract()
1620 Gets the field from the member header.
1622 =item bitFlag()
1626 Gets the general purpose bit field from the member header.
1627 This is where the C<GPBF_*> bits live.
1629 =item compressionMethod()
1633 Returns the member compression method. This is the method
1634 that is currently being used to compress the member data.
1635 This will be COMPRESSION_STORED for added string or file
1636 members, or any of the C<COMPRESSION_*> values for members
1637 from a zip file. However, this module can only handle members
1638 whose data is in COMPRESSION_STORED or COMPRESSION_DEFLATED
1639 format.
1641 =item desiredCompressionMethod( [$method] )
1645 Get or set the member's C<desiredCompressionMethod>. This is
1646 the compression method that will be used when the member is
1647 written. Returns prior desiredCompressionMethod. Only
1648 COMPRESSION_DEFLATED or COMPRESSION_STORED are valid
1649 arguments. Changing to COMPRESSION_STORED will change the
1650 member desiredCompressionLevel to 0; changing to
1651 COMPRESSION_DEFLATED will change the member
1652 desiredCompressionLevel to COMPRESSION_LEVEL_DEFAULT.
1654 =item desiredCompressionLevel( [$method] )
1658 Get or set the member's desiredCompressionLevel This is the
1659 method that will be used to write. Returns prior
1660 desiredCompressionLevel. Valid arguments are 0 through 9,
1661 COMPRESSION_LEVEL_NONE, COMPRESSION_LEVEL_DEFAULT,
1662 COMPRESSION_LEVEL_BEST_COMPRESSION, and
1663 COMPRESSION_LEVEL_FASTEST. 0 or COMPRESSION_LEVEL_NONE will
1664 change the desiredCompressionMethod to COMPRESSION_STORED.
1665 All other arguments will change the desiredCompressionMethod
1666 to COMPRESSION_DEFLATED.
1668 =item externalFileName()
1672 Return the member's external file name, if any, or undef.
1674 =item fileName()
1678 Get or set the member's internal filename. Returns the
1679 (possibly new) filename. Names will have backslashes
1680 converted to forward slashes, and will have multiple
1681 consecutive slashes converted to single ones.
1683 =item lastModFileDateTime()
1687 Return the member's last modification date/time stamp in
1688 MS-DOS format.
1690 =item lastModTime()
1694 Return the member's last modification date/time stamp,
1695 converted to unix localtime format.
1697 print "Mod Time: " . scalar( localtime( $member->lastModTime() ) );
1699 =item setLastModFileDateTimeFromUnix()
1701 Set the member's lastModFileDateTime from the given unix
1702 time.
1704 $member->setLastModFileDateTimeFromUnix( time() );
1706 =item internalFileAttributes()
1708 Return the internal file attributes field from the zip
1709 header. This is only set for members read from a zip file.
1711 =item externalFileAttributes()
1713 Return member attributes as read from the ZIP file. Note that
1714 these are NOT UNIX!
1716 =item unixFileAttributes( [$newAttributes] )
1718 Get or set the member's file attributes using UNIX file
1719 attributes. Returns old attributes.
1721 my $oldAttribs = $member->unixFileAttributes( 0666 );
1723 Note that the return value has more than just the file
1724 permissions, so you will have to mask off the lowest bits for
1725 comparisions.
1727 =item localExtraField( [$newField] )
1729 Gets or sets the extra field that was read from the local
1730 header. This is not set for a member from a zip file until
1731 after the member has been written out. The extra field must
1732 be in the proper format.
1734 =item cdExtraField( [$newField] )
1736 Gets or sets the extra field that was read from the central
1737 directory header. The extra field must be in the proper
1738 format.
1740 =item extraFields()
1742 Return both local and CD extra fields, concatenated.
1744 =item fileComment( [$newComment] )
1746 Get or set the member's file comment.
1748 =item hasDataDescriptor()
1750 Get or set the data descriptor flag. If this is set, the
1751 local header will not necessarily have the correct data
1752 sizes. Instead, a small structure will be stored at the end
1753 of the member data with these values. This should be
1754 transparent in normal operation.
1756 =item crc32()
1758 Return the CRC-32 value for this member. This will not be set
1759 for members that were constructed from strings or external
1760 files until after the member has been written.
1762 =item crc32String()
1764 Return the CRC-32 value for this member as an 8 character
1765 printable hex string. This will not be set for members that
1766 were constructed from strings or external files until after
1767 the member has been written.
1769 =item compressedSize()
1771 Return the compressed size for this member. This will not be
1772 set for members that were constructed from strings or
1773 external files until after the member has been written.
1775 =item uncompressedSize()
1777 Return the uncompressed size for this member.
1779 =item isEncrypted()
1781 Return true if this member is encrypted. The Archive::Zip
1782 module does not currently create or extract encrypted
1783 members.
1785 =item isTextFile( [$flag] )
1787 Returns true if I am a text file. Also can set the status if
1788 given an argument (then returns old state). Note that this
1789 module does not currently do anything with this flag upon
1790 extraction or storage. That is, bytes are stored in native
1791 format whether or not they came from a text file.
1793 =item isBinaryFile()
1795 Returns true if I am a binary file. Also can set the status
1796 if given an argument (then returns old state). Note that this
1797 module does not currently do anything with this flag upon
1798 extraction or storage. That is, bytes are stored in native
1799 format whether or not they came from a text file.
1801 =item extractToFileNamed( $fileName )
1803 Extract me to a file with the given name. The file will be
1804 created with default modes. Directories will be created as
1805 needed.
1806 The C<$fileName> argument should be a valid file name on your
1807 file system.
1808 Returns AZ_OK on success.
1810 =item isDirectory()
1812 Returns true if I am a directory.
1814 =item writeLocalHeaderRelativeOffset()
1816 Returns the file offset in bytes the last time I was written.
1818 =item wasWritten()
1820 Returns true if I was successfully written. Reset at the
1821 beginning of a write attempt.
1823 =back
1825 =head2 Low-level member data reading
1827 It is possible to use lower-level routines to access member data
1828 streams, rather than the extract* methods and contents(). For
1829 instance, here is how to print the uncompressed contents of a member
1830 in chunks using these methods:
1832 my ( $member, $status, $bufferRef );
1833 $member = $zip->memberNamed( 'xyz.txt' );
1834 $member->desiredCompressionMethod( COMPRESSION_STORED );
1835 $status = $member->rewindData();
1836 die "error $status" unless $status == AZ_OK;
1837 while ( ! $member->readIsDone() )
1839 ( $bufferRef, $status ) = $member->readChunk();
1840 die "error $status"
1841 if $status != AZ_OK && $status != AZ_STREAM_END;
1842 # do something with $bufferRef:
1843 print $$bufferRef;
1845 $member->endRead();
1847 =over 4
1849 =item readChunk( [$chunkSize] )
1851 This reads the next chunk of given size from the member's
1852 data stream and compresses or uncompresses it as necessary,
1853 returning a reference to the bytes read and a status. If size
1854 argument is not given, defaults to global set by
1855 Archive::Zip::setChunkSize. Status is AZ_OK on success until
1856 the last chunk, where it returns AZ_STREAM_END. Returns C<(
1857 \$bytes, $status)>.
1859 my ( $outRef, $status ) = $self->readChunk();
1860 print $$outRef if $status != AZ_OK && $status != AZ_STREAM_END;
1862 =item rewindData()
1864 Rewind data and set up for reading data streams or writing
1865 zip files. Can take options for C<inflateInit()> or
1866 C<deflateInit()>, but this isn't likely to be necessary.
1867 Subclass overrides should call this method. Returns C<AZ_OK>
1868 on success.
1870 =item endRead()
1872 Reset the read variables and free the inflater or deflater.
1873 Must be called to close files, etc. Returns AZ_OK on success.
1875 =item readIsDone()
1877 Return true if the read has run out of data or errored out.
1879 =item contents()
1881 Return the entire uncompressed member data or undef in scalar
1882 context. When called in array context, returns C<( $string,
1883 $status )>; status will be AZ_OK on success:
1885 my $string = $member->contents();
1886 # or
1887 my ( $string, $status ) = $member->contents();
1888 die "error $status" unless $status == AZ_OK;
1890 Can also be used to set the contents of a member (this may
1891 change the class of the member):
1893 $member->contents( "this is my new contents" );
1895 =item extractToFileHandle( $fh )
1897 Extract (and uncompress, if necessary) the member's contents
1898 to the given file handle. Return AZ_OK on success.
1900 =back
1902 =head1 Archive::Zip::FileMember methods
1904 The Archive::Zip::FileMember class extends Archive::Zip::Member. It is the
1905 base class for both ZipFileMember and NewFileMember classes. This class adds
1906 an C<externalFileName> and an C<fh> member to keep track of the external
1907 file.
1909 =over 4
1911 =item externalFileName()
1913 Return the member's external filename.
1915 =item fh()
1917 Return the member's read file handle. Automatically opens file if
1918 necessary.
1920 =back
1922 =head1 Archive::Zip::ZipFileMember methods
1924 The Archive::Zip::ZipFileMember class represents members that have been read
1925 from external zip files.
1927 =over 4
1929 =item diskNumberStart()
1931 Returns the disk number that the member's local header resides in.
1932 Should be 0.
1934 =item localHeaderRelativeOffset()
1936 Returns the offset into the zip file where the member's local header
1939 =item dataOffset()
1941 Returns the offset from the beginning of the zip file to the member's
1942 data.
1944 =back
1946 =head1 REQUIRED MODULES
1948 L<Archive::Zip> requires several other modules:
1950 L<Carp>
1952 L<Compress::Raw::Zlib>
1954 L<Cwd>
1956 L<File::Basename>
1958 L<File::Copy>
1960 L<File::Find>
1962 L<File::Path>
1964 L<File::Spec>
1966 L<IO::File>
1968 L<IO::Seekable>
1970 L<Time::Local>
1972 =head1 BUGS AND CAVEATS
1974 =head2 When not to use Archive::Zip
1976 If you are just going to be extracting zips (and/or other archives) you
1977 are recommended to look at using L<Archive::Extract> instead, as it is much
1978 easier to use and factors out archive-specific functionality.
1980 =head2 Try to avoid IO::Scalar
1982 One of the most common ways to use Archive::Zip is to generate Zip files
1983 in-memory. Most people have use L<IO::Scalar> for this purpose.
1985 Unfortunately, as of 1.11 this module no longer works with L<IO::Scalar>
1986 as it incorrectly implements seeking.
1988 Anybody using L<IO::Scalar> should consider porting to L<IO::String>,
1989 which is smaller, lighter, and is implemented to be perfectly compatible
1990 with regular seekable filehandles.
1992 Support for L<IO::Scalar> most likely will B<not> be restored in the
1993 future, as L<IO::Scalar> itself cannot change the way it is implemented
1994 due to back-compatibility issues.
1996 =head1 TO DO
1998 * auto-choosing storing vs compression
2000 * extra field hooks (see notes.txt)
2002 * check for dups on addition/renaming?
2004 * Text file extraction (line end translation)
2006 * Reading zip files from non-seekable inputs
2007 (Perhaps by proxying through IO::String?)
2009 * separate unused constants into separate module
2011 * cookbook style docs
2013 * Handle tainted paths correctly
2015 * Work on better compatability with other IO:: modules
2017 =head1 SUPPORT
2019 Bugs should be reported via the CPAN bug tracker
2021 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Archive-Zip>
2023 For other issues contact the maintainer
2025 =head1 AUTHOR
2027 Adam Kennedy E<lt>adamk@cpan.orgE<gt>
2029 Previously maintained by Steve Peters E<lt>steve@fisharerojo.orgE<gt>.
2031 File attributes code by Maurice Aubrey E<lt>maurice@lovelyfilth.comE<gt>.
2033 Originally by Ned Konz E<lt>nedkonz@cpan.orgE<gt>.
2035 =head1 COPYRIGHT
2037 Some parts copyright 2006 - 2009 Adam Kennedy.
2039 Some parts copyright 2005 Steve Peters.
2041 Original work copyright 2000 - 2004 Ned Konz.
2043 This program is free software; you can redistribute it and/or modify
2044 it under the same terms as Perl itself.
2046 =head1 SEE ALSO
2048 Look at L<Archive::Zip::MemberRead> which is a wrapper that allows one to
2049 read Zip archive members as if they were files.
2051 L<Compress::Raw::Zlib>, L<Archive::Tar>, L<Archive::Extract>
2053 There is a Japanese translation of this
2054 document at L<http://www.memb.jp/~deq/perl/doc-ja/Archive-Zip.html>
2055 that was done by DEQ E<lt>deq@oct.zaq.ne.jpE<gt> . Thanks!
2057 =cut