2 // This file is part of Moodle - http://moodle.org/
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
18 * Implementation of zip file archive.
21 * @copyright 2008 Petr Skoda (http://skodak.org)
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25 defined('MOODLE_INTERNAL') ||
die();
27 require_once("$CFG->libdir/filestorage/file_archive.php");
30 * Zip file archive class.
34 * @copyright 2008 Petr Skoda (http://skodak.org)
35 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
37 class zip_archive
extends file_archive
{
39 /** @var string Pathname of archive */
40 protected $archivepathname = null;
42 /** @var int archive open mode */
43 protected $mode = null;
45 /** @var int Used memory tracking */
46 protected $usedmem = 0;
48 /** @var int Iteration position */
51 /** @var ZipArchive instance */
54 /** @var bool was this archive modified? */
55 protected $modified = false;
57 /** @var array unicode decoding array, created by decoding zip file */
58 protected $namelookup = null;
60 /** @var string base64 encoded contents of empty zip file */
61 protected static $emptyzipcontent = 'UEsFBgAAAAAAAAAAAAAAAAAAAAAAAA==';
63 /** @var bool ugly hack for broken empty zip handling in < PHP 5.3.10 */
64 protected $emptyziphack = false;
67 * Create new zip_archive instance.
69 public function __construct() {
70 $this->encoding
= null; // Autodetects encoding by default.
74 * Open or create archive (depending on $mode).
76 * @todo MDL-31048 return error message
77 * @param string $archivepathname
78 * @param int $mode OPEN, CREATE or OVERWRITE constant
79 * @param string $encoding archive local paths encoding, empty means autodetect
80 * @return bool success
82 public function open($archivepathname, $mode=file_archive
::CREATE
, $encoding=null) {
87 $this->encoding
= $encoding;
90 $this->za
= new ZipArchive();
93 case file_archive
::OPEN
: $flags = 0; break;
94 case file_archive
::OVERWRITE
: $flags = ZIPARCHIVE
::CREATE | ZIPARCHIVE
::OVERWRITE
; break; //changed in PHP 5.2.8
95 case file_archive
::CREATE
:
96 default : $flags = ZIPARCHIVE
::CREATE
; break;
99 $result = $this->za
->open($archivepathname, $flags);
101 if ($flags == 0 and $result === ZIPARCHIVE
::ER_NOZIP
and filesize($archivepathname) === 22) {
102 // Legacy PHP versions < 5.3.10 can not deal with empty zip archives.
103 if (file_get_contents($archivepathname) === base64_decode(self
::$emptyzipcontent)) {
104 if ($temp = make_temp_directory('zip')) {
105 $this->emptyziphack
= tempnam($temp, 'zip');
106 $this->za
= new ZipArchive();
107 $result = $this->za
->open($this->emptyziphack
, ZIPARCHIVE
::CREATE
);
112 if ($result === true) {
113 if (file_exists($archivepathname)) {
114 $this->archivepathname
= realpath($archivepathname);
116 $this->archivepathname
= $archivepathname;
121 $message = 'Unknown error.';
123 case ZIPARCHIVE
::ER_EXISTS
: $message = 'File already exists.'; break;
124 case ZIPARCHIVE
::ER_INCONS
: $message = 'Zip archive inconsistent.'; break;
125 case ZIPARCHIVE
::ER_INVAL
: $message = 'Invalid argument.'; break;
126 case ZIPARCHIVE
::ER_MEMORY
: $message = 'Malloc failure.'; break;
127 case ZIPARCHIVE
::ER_NOENT
: $message = 'No such file.'; break;
128 case ZIPARCHIVE
::ER_NOZIP
: $message = 'Not a zip archive.'; break;
129 case ZIPARCHIVE
::ER_OPEN
: $message = 'Can\'t open file.'; break;
130 case ZIPARCHIVE
::ER_READ
: $message = 'Read error.'; break;
131 case ZIPARCHIVE
::ER_SEEK
: $message = 'Seek error.'; break;
133 debugging($message.': '.$archivepathname, DEBUG_DEVELOPER
);
135 $this->archivepathname
= null;
141 * Normalize $localname, always keep in utf-8 encoding.
143 * @param string $localname name of file in utf-8 encoding
144 * @return string normalised compressed file or directory name
146 protected function mangle_pathname($localname) {
147 $result = str_replace('\\', '/', $localname); // no MS \ separators
148 $result = preg_replace('/\.\.+\//', '', $result); // Cleanup any potential ../ transversal (any number of dots).
149 $result = preg_replace('/\.\.+/', '.', $result); // Join together any number of consecutive dots.
150 $result = ltrim($result, '/'); // no leading slash
152 if ($result === '.') {
160 * Tries to convert $localname into utf-8
161 * please note that it may fail really badly.
162 * The resulting file name is cleaned.
164 * @param string $localname name (encoding is read from zip file or guessed)
165 * @return string in utf-8
167 protected function unmangle_pathname($localname) {
168 $this->init_namelookup();
170 if (!isset($this->namelookup
[$localname])) {
172 // This should not happen.
173 if (!empty($this->encoding
) and $this->encoding
!== 'utf-8') {
174 $name = @core_text
::convert($name, $this->encoding
, 'utf-8');
176 $name = str_replace('\\', '/', $name); // no MS \ separators
177 $name = clean_param($name, PARAM_PATH
); // only safe chars
178 return ltrim($name, '/'); // no leading slash
181 return $this->namelookup
[$localname];
185 * Close archive, write changes to disk.
187 * @return bool success
189 public function close() {
190 if (!isset($this->za
)) {
194 if ($this->emptyziphack
) {
198 $this->namelookup
= null;
199 $this->modified
= false;
200 @unlink
($this->emptyziphack
);
201 $this->emptyziphack
= false;
204 } else if ($this->za
->numFiles
== 0) {
205 // PHP can not create empty archives, so let's fake it.
209 $this->namelookup
= null;
210 $this->modified
= false;
211 // If the existing archive is already empty, we didn't change it. Don't bother completing a save.
212 // This is important when we are inspecting archives that we might not have write permission to.
213 if (@filesize
($this->archivepathname
) == 22 &&
214 @file_get_contents
($this->archivepathname
) === base64_decode(self
::$emptyzipcontent)) {
217 @unlink
($this->archivepathname
);
218 $data = base64_decode(self
::$emptyzipcontent);
219 if (!file_put_contents($this->archivepathname
, $data)) {
225 $res = $this->za
->close();
228 $this->namelookup
= null;
230 if ($this->modified
) {
231 $this->fix_utf8_flags();
232 $this->modified
= false;
239 * Returns file stream for reading of content.
241 * @param int $index index of file
242 * @return resource|bool file handle or false if error
244 public function get_stream($index) {
245 if (!isset($this->za
)) {
249 $name = $this->za
->getNameIndex($index);
250 if ($name === false) {
254 return $this->za
->getStream($name);
258 * Extract the archive contents to the given location.
260 * @param string $destination Path to the location where to extract the files.
261 * @param int $index Index of the archive entry.
262 * @return bool true on success or false on failure
264 public function extract_to($destination, $index) {
266 if (!isset($this->za
)) {
270 $name = $this->za
->getNameIndex($index);
272 if ($name === false) {
276 return $this->za
->extractTo($destination, $name);
280 * Returns file information.
282 * @param int $index index of file
283 * @return stdClass|bool info object or false if error
285 public function get_info($index) {
286 if (!isset($this->za
)) {
290 // Need to use the ZipArchive's numfiles, as $this->count() relies on this function to count actual files (skipping OSX junk).
291 if ($index < 0 or $index >=$this->za
->numFiles
) {
295 // PHP 5.6 introduced encoding guessing logic for file names. To keep consistent behaviour with older versions,
296 // we fall back to obtaining file names as raw unmodified strings.
297 $result = $this->za
->statIndex($index, ZipArchive
::FL_ENC_RAW
);
299 if ($result === false) {
303 $info = new stdClass();
304 $info->index
= $index;
305 $info->original_pathname
= $result['name'];
306 $info->pathname
= $this->unmangle_pathname($result['name']);
307 $info->mtime
= (int)$result['mtime'];
309 if ($info->pathname
[strlen($info->pathname
)-1] === '/') {
310 $info->is_directory
= true;
313 $info->is_directory
= false;
314 $info->size
= (int)$result['size'];
317 if ($this->is_system_file($info)) {
318 // Don't return system files.
326 * Returns array of info about all files in archive.
328 * @return array of file infos
330 public function list_files() {
331 if (!isset($this->za
)) {
337 foreach ($this as $info) {
338 // Simply iterating over $this will give us info only for files we're interested in.
339 array_push($infos, $info);
345 public function is_system_file($fileinfo) {
346 if (substr($fileinfo->pathname
, 0, 8) === '__MACOSX' or substr($fileinfo->pathname
, -9) === '.DS_Store') {
347 // Mac OSX system files.
350 if (substr($fileinfo->pathname
, -9) === 'Thumbs.db') {
351 $stream = $this->za
->getStream($fileinfo->pathname
);
352 $info = base64_encode(fread($stream, 8));
354 if ($info === '0M8R4KGxGuE=') {
355 // It's an OLE Compound File - so it's almost certainly a Windows thumbnail cache.
363 * Returns number of files in archive.
365 * @return int number of files
367 public function count() {
368 if (!isset($this->za
)) {
372 return count($this->list_files());
376 * Returns approximate number of files in archive. This may be a slight
379 * @return int|bool Estimated number of files, or false if not opened
381 public function estimated_count() {
382 if (!isset($this->za
)) {
386 return $this->za
->numFiles
;
390 * Add file into archive.
392 * @param string $localname name of file in archive
393 * @param string $pathname location of file
394 * @return bool success
396 public function add_file_from_pathname($localname, $pathname) {
397 if ($this->emptyziphack
) {
399 $this->open($this->archivepathname
, file_archive
::OVERWRITE
, $this->encoding
);
402 if (!isset($this->za
)) {
406 if ($this->archivepathname
=== realpath($pathname)) {
407 // Do not add self into archive.
411 if (!is_readable($pathname) or is_dir($pathname)) {
415 if (is_null($localname)) {
416 $localname = clean_param($pathname, PARAM_PATH
);
418 $localname = trim($localname, '/'); // No leading slashes in archives!
419 $localname = $this->mangle_pathname($localname);
421 if ($localname === '') {
422 // Sorry - conversion failed badly.
426 if (!$this->za
->addFile($pathname, $localname)) {
429 $this->modified
= true;
434 * Add content of string into archive.
436 * @param string $localname name of file in archive
437 * @param string $contents contents
438 * @return bool success
440 public function add_file_from_string($localname, $contents) {
441 if ($this->emptyziphack
) {
443 $this->open($this->archivepathname
, file_archive
::OVERWRITE
, $this->encoding
);
446 if (!isset($this->za
)) {
450 $localname = trim($localname, '/'); // No leading slashes in archives!
451 $localname = $this->mangle_pathname($localname);
453 if ($localname === '') {
454 // Sorry - conversion failed badly.
458 if ($this->usedmem
> 2097151) {
459 // This prevents running out of memory when adding many large files using strings.
461 $res = $this->open($this->archivepathname
, file_archive
::OPEN
, $this->encoding
);
463 print_error('cannotopenzip');
466 $this->usedmem +
= strlen($contents);
468 if (!$this->za
->addFromString($localname, $contents)) {
471 $this->modified
= true;
476 * Add empty directory into archive.
478 * @param string $localname name of file in archive
479 * @return bool success
481 public function add_directory($localname) {
482 if ($this->emptyziphack
) {
484 $this->open($this->archivepathname
, file_archive
::OVERWRITE
, $this->encoding
);
487 if (!isset($this->za
)) {
490 $localname = trim($localname, '/'). '/';
491 $localname = $this->mangle_pathname($localname);
493 if ($localname === '/') {
494 // Sorry - conversion failed badly.
498 if ($localname !== '') {
499 if (!$this->za
->addEmptyDir($localname)) {
502 $this->modified
= true;
508 * Returns current file info.
512 public function current() {
513 if (!isset($this->za
)) {
517 return $this->get_info($this->pos
);
521 * Returns the index of current file.
523 * @return int current file index
525 public function key() {
530 * Moves forward to next file.
532 public function next() {
537 * Rewinds back to the first file.
539 public function rewind() {
544 * Did we reach the end?
548 public function valid() {
549 if (!isset($this->za
)) {
553 // Skip over unwanted system files (get_info will return false).
554 while (!$this->get_info($this->pos
) && $this->pos
< $this->za
->numFiles
) {
558 // No files left - we're at the end.
559 if ($this->pos
>= $this->za
->numFiles
) {
567 * Create a map of file names used in zip archive.
570 protected function init_namelookup() {
571 if ($this->emptyziphack
) {
572 $this->namelookup
= array();
576 if (!isset($this->za
)) {
579 if (isset($this->namelookup
)) {
583 $this->namelookup
= array();
585 if ($this->mode
!= file_archive
::OPEN
) {
586 // No need to tweak existing names when creating zip file because there are none yet!
590 if (!file_exists($this->archivepathname
)) {
594 if (!$fp = fopen($this->archivepathname
, 'rb')) {
597 if (!$filesize = filesize($this->archivepathname
)) {
601 $centralend = self
::zip_get_central_end($fp, $filesize);
603 if ($centralend === false or $centralend['disk'] !== 0 or $centralend['disk_start'] !== 0 or $centralend['offset'] === 0xFFFFFFFF) {
604 // Single disk archives only and o support for ZIP64, sorry.
609 fseek($fp, $centralend['offset']);
610 $data = fread($fp, $centralend['size']);
613 for($i=0; $i<$centralend['entries']; $i++
) {
614 $file = self
::zip_parse_file_header($data, $centralend, $pos);
615 if ($file === false) {
616 // Wrong header, sorry.
624 foreach ($files as $file) {
625 $name = $file['name'];
626 if (preg_match('/^[a-zA-Z0-9_\-\.]*$/', $file['name'])) {
627 // No need to fix ASCII.
628 $name = fix_utf8($name);
630 } else if (!($file['general'] & pow(2, 11))) {
631 // First look for unicode name alternatives.
633 foreach($file['extra'] as $extra) {
634 if ($extra['id'] === 0x7075) {
635 $data = unpack('cversion/Vcrc', substr($extra['data'], 0, 5));
636 if ($data['crc'] === crc32($name)) {
638 $name = substr($extra['data'], 5);
642 if (!$found and !empty($this->encoding
) and $this->encoding
!== 'utf-8') {
643 // Try the encoding from open().
644 $newname = @core_text
::convert($name, $this->encoding
, 'utf-8');
645 $original = core_text
::convert($newname, 'utf-8', $this->encoding
);
646 if ($original === $name) {
651 if (!$found and $file['version'] === 0x315) {
652 // This looks like OS X build in zipper.
653 $newname = fix_utf8($name);
654 if ($newname === $name) {
659 if (!$found and $file['version'] === 0) {
660 // This looks like our old borked Moodle 2.2 file.
661 $newname = fix_utf8($name);
662 if ($newname === $name) {
667 if (!$found and $encoding = get_string('oldcharset', 'langconfig')) {
668 // Last attempt - try the dos/unix encoding from current language.
670 foreach($file['extra'] as $extra) {
671 // In Windows archivers do not usually set any extras with the exception of NTFS flag in WinZip/WinRar.
673 if ($extra['id'] === 0x000a) {
679 if ($windows === true) {
680 switch(strtoupper($encoding)) {
681 case 'ISO-8859-1': $encoding = 'CP850'; break;
682 case 'ISO-8859-2': $encoding = 'CP852'; break;
683 case 'ISO-8859-4': $encoding = 'CP775'; break;
684 case 'ISO-8859-5': $encoding = 'CP866'; break;
685 case 'ISO-8859-6': $encoding = 'CP720'; break;
686 case 'ISO-8859-7': $encoding = 'CP737'; break;
687 case 'ISO-8859-8': $encoding = 'CP862'; break;
688 case 'WINDOWS-1251': $encoding = 'CP866'; break;
691 if ($winchar = get_string('localewincharset', 'langconfig')) {
692 // Most probably works only for zh_cn,
693 // if there are more problems we could add zipcharset to langconfig files.
694 $encoding = $winchar;
699 $newname = @core_text
::convert($name, $encoding, 'utf-8');
700 $original = core_text
::convert($newname, 'utf-8', $encoding);
702 if ($original === $name) {
707 $name = str_replace('\\', '/', $name); // no MS \ separators
708 $name = clean_param($name, PARAM_PATH
); // only safe chars
709 $name = ltrim($name, '/'); // no leading slash
711 if (function_exists('normalizer_normalize')) {
712 $name = normalizer_normalize($name, Normalizer
::FORM_C
);
715 $this->namelookup
[$file['name']] = $name;
720 * Add unicode flag to all files in archive.
722 * NOTE: single disk archives only, no ZIP64 support.
724 * @return bool success, modifies the file contents
726 protected function fix_utf8_flags() {
727 if ($this->emptyziphack
) {
731 if (!file_exists($this->archivepathname
)) {
735 // Note: the ZIP structure is described at http://www.pkware.com/documents/casestudies/APPNOTE.TXT
736 if (!$fp = fopen($this->archivepathname
, 'rb+')) {
739 if (!$filesize = filesize($this->archivepathname
)) {
743 $centralend = self
::zip_get_central_end($fp, $filesize);
745 if ($centralend === false or $centralend['disk'] !== 0 or $centralend['disk_start'] !== 0 or $centralend['offset'] === 0xFFFFFFFF) {
746 // Single disk archives only and o support for ZIP64, sorry.
751 fseek($fp, $centralend['offset']);
752 $data = fread($fp, $centralend['size']);
755 for($i=0; $i<$centralend['entries']; $i++
) {
756 $file = self
::zip_parse_file_header($data, $centralend, $pos);
757 if ($file === false) {
758 // Wrong header, sorry.
763 $newgeneral = $file['general'] |
pow(2, 11);
764 if ($newgeneral === $file['general']) {
765 // Nothing to do with this file.
769 if (preg_match('/^[a-zA-Z0-9_\-\.]*$/', $file['name'])) {
770 // ASCII file names are always ok.
773 if ($file['extra']) {
774 // Most probably not created by php zip ext, better to skip it.
777 if (fix_utf8($file['name']) !== $file['name']) {
778 // Does not look like a valid utf-8 encoded file name, skip it.
782 // Read local file header.
783 fseek($fp, $file['local_offset']);
784 $localfile = unpack('Vsig/vversion_req/vgeneral/vmethod/vmtime/vmdate/Vcrc/Vsize_compressed/Vsize/vname_length/vextra_length', fread($fp, 30));
785 if ($localfile['sig'] !== 0x04034b50) {
791 $file['local'] = $localfile;
795 foreach ($files as $file) {
796 $localfile = $file['local'];
797 // Add the unicode flag in central file header.
798 fseek($fp, $file['central_offset'] +
8);
799 if (ftell($fp) === $file['central_offset'] +
8) {
800 $newgeneral = $file['general'] |
pow(2, 11);
801 fwrite($fp, pack('v', $newgeneral));
803 // Modify local file header too.
804 fseek($fp, $file['local_offset'] +
6);
805 if (ftell($fp) === $file['local_offset'] +
6) {
806 $newgeneral = $localfile['general'] |
pow(2, 11);
807 fwrite($fp, pack('v', $newgeneral));
816 * Read end of central signature of ZIP file.
819 * @param resource $fp
820 * @param int $filesize
823 public static function zip_get_central_end($fp, $filesize) {
824 // Find end of central directory record.
825 fseek($fp, $filesize - 22);
826 $info = unpack('Vsig', fread($fp, 4));
827 if ($info['sig'] === 0x06054b50) {
828 // There is no comment.
829 fseek($fp, $filesize - 22);
830 $data = fread($fp, 22);
832 // There is some comment with 0xFF max size - that is 65557.
833 fseek($fp, $filesize - 65557);
834 $data = fread($fp, 65557);
837 $pos = strpos($data, pack('V', 0x06054b50));
838 if ($pos === false) {
839 // Borked ZIP structure!
842 $centralend = unpack('Vsig/vdisk/vdisk_start/vdisk_entries/ventries/Vsize/Voffset/vcomment_length', substr($data, $pos, 22));
843 if ($centralend['comment_length']) {
844 $centralend['comment'] = substr($data, 22, $centralend['comment_length']);
846 $centralend['comment'] = '';
855 * @param string $data
856 * @param array $centralend
857 * @param int $pos (modified)
858 * @return array|bool file info
860 public static function zip_parse_file_header($data, $centralend, &$pos) {
861 $file = unpack('Vsig/vversion/vversion_req/vgeneral/vmethod/Vmodified/Vcrc/Vsize_compressed/Vsize/vname_length/vextra_length/vcomment_length/vdisk/vattr/Vattrext/Vlocal_offset', substr($data, $pos, 46));
862 $file['central_offset'] = $centralend['offset'] +
$pos;
864 if ($file['sig'] !== 0x02014b50) {
865 // Borked ZIP structure!
868 $file['name'] = substr($data, $pos, $file['name_length']);
869 $pos = $pos +
$file['name_length'];
870 $file['extra'] = array();
871 $file['extra_data'] = '';
872 if ($file['extra_length']) {
873 $extradata = substr($data, $pos, $file['extra_length']);
874 $file['extra_data'] = $extradata;
875 while (strlen($extradata) > 4) {
876 $extra = unpack('vid/vsize', substr($extradata, 0, 4));
877 $extra['data'] = substr($extradata, 4, $extra['size']);
878 $extradata = substr($extradata, 4+
$extra['size']);
879 $file['extra'][] = $extra;
881 $pos = $pos +
$file['extra_length'];
883 if ($file['comment_length']) {
884 $pos = $pos +
$file['comment_length'];
885 $file['comment'] = substr($data, $pos, $file['comment_length']);
887 $file['comment'] = '';