Merge branch 'MDL-57455_master-fix' of https://github.com/markn86/moodle
[moodle.git] / lib / filestorage / zip_archive.php
blob213131436421bcf1dc29d502d1019d48a485ed3a
1 <?php
2 // This file is part of Moodle - http://moodle.org/
3 //
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
17 /**
18 * Implementation of zip file archive.
20 * @package core_files
21 * @copyright 2008 Petr Skoda (http://skodak.org)
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25 defined('MOODLE_INTERNAL') || die();
27 require_once("$CFG->libdir/filestorage/file_archive.php");
29 /**
30 * Zip file archive class.
32 * @package core_files
33 * @category files
34 * @copyright 2008 Petr Skoda (http://skodak.org)
35 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
37 class zip_archive extends file_archive {
39 /** @var string Pathname of archive */
40 protected $archivepathname = null;
42 /** @var int archive open mode */
43 protected $mode = null;
45 /** @var int Used memory tracking */
46 protected $usedmem = 0;
48 /** @var int Iteration position */
49 protected $pos = 0;
51 /** @var ZipArchive instance */
52 protected $za;
54 /** @var bool was this archive modified? */
55 protected $modified = false;
57 /** @var array unicode decoding array, created by decoding zip file */
58 protected $namelookup = null;
60 /** @var string base64 encoded contents of empty zip file */
61 protected static $emptyzipcontent = 'UEsFBgAAAAAAAAAAAAAAAAAAAAAAAA==';
63 /** @var bool ugly hack for broken empty zip handling in < PHP 5.3.10 */
64 protected $emptyziphack = false;
66 /**
67 * Create new zip_archive instance.
69 public function __construct() {
70 $this->encoding = null; // Autodetects encoding by default.
73 /**
74 * Open or create archive (depending on $mode).
76 * @todo MDL-31048 return error message
77 * @param string $archivepathname
78 * @param int $mode OPEN, CREATE or OVERWRITE constant
79 * @param string $encoding archive local paths encoding, empty means autodetect
80 * @return bool success
82 public function open($archivepathname, $mode=file_archive::CREATE, $encoding=null) {
83 $this->close();
85 $this->usedmem = 0;
86 $this->pos = 0;
87 $this->encoding = $encoding;
88 $this->mode = $mode;
90 $this->za = new ZipArchive();
92 switch($mode) {
93 case file_archive::OPEN: $flags = 0; break;
94 case file_archive::OVERWRITE: $flags = ZIPARCHIVE::CREATE | ZIPARCHIVE::OVERWRITE; break; //changed in PHP 5.2.8
95 case file_archive::CREATE:
96 default : $flags = ZIPARCHIVE::CREATE; break;
99 $result = $this->za->open($archivepathname, $flags);
101 if ($flags == 0 and $result === ZIPARCHIVE::ER_NOZIP and filesize($archivepathname) === 22) {
102 // Legacy PHP versions < 5.3.10 can not deal with empty zip archives.
103 if (file_get_contents($archivepathname) === base64_decode(self::$emptyzipcontent)) {
104 if ($temp = make_temp_directory('zip')) {
105 $this->emptyziphack = tempnam($temp, 'zip');
106 $this->za = new ZipArchive();
107 $result = $this->za->open($this->emptyziphack, ZIPARCHIVE::CREATE);
112 if ($result === true) {
113 if (file_exists($archivepathname)) {
114 $this->archivepathname = realpath($archivepathname);
115 } else {
116 $this->archivepathname = $archivepathname;
118 return true;
120 } else {
121 $message = 'Unknown error.';
122 switch ($result) {
123 case ZIPARCHIVE::ER_EXISTS: $message = 'File already exists.'; break;
124 case ZIPARCHIVE::ER_INCONS: $message = 'Zip archive inconsistent.'; break;
125 case ZIPARCHIVE::ER_INVAL: $message = 'Invalid argument.'; break;
126 case ZIPARCHIVE::ER_MEMORY: $message = 'Malloc failure.'; break;
127 case ZIPARCHIVE::ER_NOENT: $message = 'No such file.'; break;
128 case ZIPARCHIVE::ER_NOZIP: $message = 'Not a zip archive.'; break;
129 case ZIPARCHIVE::ER_OPEN: $message = 'Can\'t open file.'; break;
130 case ZIPARCHIVE::ER_READ: $message = 'Read error.'; break;
131 case ZIPARCHIVE::ER_SEEK: $message = 'Seek error.'; break;
133 debugging($message.': '.$archivepathname, DEBUG_DEVELOPER);
134 $this->za = null;
135 $this->archivepathname = null;
136 return false;
141 * Normalize $localname, always keep in utf-8 encoding.
143 * @param string $localname name of file in utf-8 encoding
144 * @return string normalised compressed file or directory name
146 protected function mangle_pathname($localname) {
147 $result = str_replace('\\', '/', $localname); // no MS \ separators
148 $result = preg_replace('/\.\.+/', '', $result); // prevent /.../
149 $result = ltrim($result, '/'); // no leading slash
151 if ($result === '.') {
152 $result = '';
155 return $result;
159 * Tries to convert $localname into utf-8
160 * please note that it may fail really badly.
161 * The resulting file name is cleaned.
163 * @param string $localname name (encoding is read from zip file or guessed)
164 * @return string in utf-8
166 protected function unmangle_pathname($localname) {
167 $this->init_namelookup();
169 if (!isset($this->namelookup[$localname])) {
170 $name = $localname;
171 // This should not happen.
172 if (!empty($this->encoding) and $this->encoding !== 'utf-8') {
173 $name = @core_text::convert($name, $this->encoding, 'utf-8');
175 $name = str_replace('\\', '/', $name); // no MS \ separators
176 $name = clean_param($name, PARAM_PATH); // only safe chars
177 return ltrim($name, '/'); // no leading slash
180 return $this->namelookup[$localname];
184 * Close archive, write changes to disk.
186 * @return bool success
188 public function close() {
189 if (!isset($this->za)) {
190 return false;
193 if ($this->emptyziphack) {
194 @$this->za->close();
195 $this->za = null;
196 $this->mode = null;
197 $this->namelookup = null;
198 $this->modified = false;
199 @unlink($this->emptyziphack);
200 $this->emptyziphack = false;
201 return true;
203 } else if ($this->za->numFiles == 0) {
204 // PHP can not create empty archives, so let's fake it.
205 @$this->za->close();
206 $this->za = null;
207 $this->mode = null;
208 $this->namelookup = null;
209 $this->modified = false;
210 // If the existing archive is already empty, we didn't change it. Don't bother completing a save.
211 // This is important when we are inspecting archives that we might not have write permission to.
212 if (@filesize($this->archivepathname) == 22 &&
213 @file_get_contents($this->archivepathname) === base64_decode(self::$emptyzipcontent)) {
214 return true;
216 @unlink($this->archivepathname);
217 $data = base64_decode(self::$emptyzipcontent);
218 if (!file_put_contents($this->archivepathname, $data)) {
219 return false;
221 return true;
224 $res = $this->za->close();
225 $this->za = null;
226 $this->mode = null;
227 $this->namelookup = null;
229 if ($this->modified) {
230 $this->fix_utf8_flags();
231 $this->modified = false;
234 return $res;
238 * Returns file stream for reading of content.
240 * @param int $index index of file
241 * @return resource|bool file handle or false if error
243 public function get_stream($index) {
244 if (!isset($this->za)) {
245 return false;
248 $name = $this->za->getNameIndex($index);
249 if ($name === false) {
250 return false;
253 return $this->za->getStream($name);
257 * Returns file information.
259 * @param int $index index of file
260 * @return stdClass|bool info object or false if error
262 public function get_info($index) {
263 if (!isset($this->za)) {
264 return false;
267 // Need to use the ZipArchive's numfiles, as $this->count() relies on this function to count actual files (skipping OSX junk).
268 if ($index < 0 or $index >=$this->za->numFiles) {
269 return false;
272 // PHP 5.6 introduced encoding guessing logic, we need to fall back
273 // to raw ZIP_FL_ENC_RAW (== 64) to get consistent results as in PHP 5.5.
274 $result = $this->za->statIndex($index, 64);
276 if ($result === false) {
277 return false;
280 $info = new stdClass();
281 $info->index = $index;
282 $info->original_pathname = $result['name'];
283 $info->pathname = $this->unmangle_pathname($result['name']);
284 $info->mtime = (int)$result['mtime'];
286 if ($info->pathname[strlen($info->pathname)-1] === '/') {
287 $info->is_directory = true;
288 $info->size = 0;
289 } else {
290 $info->is_directory = false;
291 $info->size = (int)$result['size'];
294 if ($this->is_system_file($info)) {
295 // Don't return system files.
296 return false;
299 return $info;
303 * Returns array of info about all files in archive.
305 * @return array of file infos
307 public function list_files() {
308 if (!isset($this->za)) {
309 return false;
312 $infos = array();
314 foreach ($this as $info) {
315 // Simply iterating over $this will give us info only for files we're interested in.
316 array_push($infos, $info);
319 return $infos;
322 public function is_system_file($fileinfo) {
323 if (substr($fileinfo->pathname, 0, 8) === '__MACOSX' or substr($fileinfo->pathname, -9) === '.DS_Store') {
324 // Mac OSX system files.
325 return true;
327 if (substr($fileinfo->pathname, -9) === 'Thumbs.db') {
328 $stream = $this->za->getStream($fileinfo->pathname);
329 $info = base64_encode(fread($stream, 8));
330 fclose($stream);
331 if ($info === '0M8R4KGxGuE=') {
332 // It's an OLE Compound File - so it's almost certainly a Windows thumbnail cache.
333 return true;
336 return false;
340 * Returns number of files in archive.
342 * @return int number of files
344 public function count() {
345 if (!isset($this->za)) {
346 return false;
349 return count($this->list_files());
353 * Returns approximate number of files in archive. This may be a slight
354 * overestimate.
356 * @return int|bool Estimated number of files, or false if not opened
358 public function estimated_count() {
359 if (!isset($this->za)) {
360 return false;
363 return $this->za->numFiles;
367 * Add file into archive.
369 * @param string $localname name of file in archive
370 * @param string $pathname location of file
371 * @return bool success
373 public function add_file_from_pathname($localname, $pathname) {
374 if ($this->emptyziphack) {
375 $this->close();
376 $this->open($this->archivepathname, file_archive::OVERWRITE, $this->encoding);
379 if (!isset($this->za)) {
380 return false;
383 if ($this->archivepathname === realpath($pathname)) {
384 // Do not add self into archive.
385 return false;
388 if (!is_readable($pathname) or is_dir($pathname)) {
389 return false;
392 if (is_null($localname)) {
393 $localname = clean_param($pathname, PARAM_PATH);
395 $localname = trim($localname, '/'); // No leading slashes in archives!
396 $localname = $this->mangle_pathname($localname);
398 if ($localname === '') {
399 // Sorry - conversion failed badly.
400 return false;
403 if (!$this->za->addFile($pathname, $localname)) {
404 return false;
406 $this->modified = true;
407 return true;
411 * Add content of string into archive.
413 * @param string $localname name of file in archive
414 * @param string $contents contents
415 * @return bool success
417 public function add_file_from_string($localname, $contents) {
418 if ($this->emptyziphack) {
419 $this->close();
420 $this->open($this->archivepathname, file_archive::OVERWRITE, $this->encoding);
423 if (!isset($this->za)) {
424 return false;
427 $localname = trim($localname, '/'); // No leading slashes in archives!
428 $localname = $this->mangle_pathname($localname);
430 if ($localname === '') {
431 // Sorry - conversion failed badly.
432 return false;
435 if ($this->usedmem > 2097151) {
436 // This prevents running out of memory when adding many large files using strings.
437 $this->close();
438 $res = $this->open($this->archivepathname, file_archive::OPEN, $this->encoding);
439 if ($res !== true) {
440 print_error('cannotopenzip');
443 $this->usedmem += strlen($contents);
445 if (!$this->za->addFromString($localname, $contents)) {
446 return false;
448 $this->modified = true;
449 return true;
453 * Add empty directory into archive.
455 * @param string $localname name of file in archive
456 * @return bool success
458 public function add_directory($localname) {
459 if ($this->emptyziphack) {
460 $this->close();
461 $this->open($this->archivepathname, file_archive::OVERWRITE, $this->encoding);
464 if (!isset($this->za)) {
465 return false;
467 $localname = trim($localname, '/'). '/';
468 $localname = $this->mangle_pathname($localname);
470 if ($localname === '/') {
471 // Sorry - conversion failed badly.
472 return false;
475 if ($localname !== '') {
476 if (!$this->za->addEmptyDir($localname)) {
477 return false;
479 $this->modified = true;
481 return true;
485 * Returns current file info.
487 * @return stdClass
489 public function current() {
490 if (!isset($this->za)) {
491 return false;
494 return $this->get_info($this->pos);
498 * Returns the index of current file.
500 * @return int current file index
502 public function key() {
503 return $this->pos;
507 * Moves forward to next file.
509 public function next() {
510 $this->pos++;
514 * Rewinds back to the first file.
516 public function rewind() {
517 $this->pos = 0;
521 * Did we reach the end?
523 * @return bool
525 public function valid() {
526 if (!isset($this->za)) {
527 return false;
530 // Skip over unwanted system files (get_info will return false).
531 while (!$this->get_info($this->pos) && $this->pos < $this->za->numFiles) {
532 $this->next();
535 // No files left - we're at the end.
536 if ($this->pos >= $this->za->numFiles) {
537 return false;
540 return true;
544 * Create a map of file names used in zip archive.
545 * @return void
547 protected function init_namelookup() {
548 if ($this->emptyziphack) {
549 $this->namelookup = array();
550 return;
553 if (!isset($this->za)) {
554 return;
556 if (isset($this->namelookup)) {
557 return;
560 $this->namelookup = array();
562 if ($this->mode != file_archive::OPEN) {
563 // No need to tweak existing names when creating zip file because there are none yet!
564 return;
567 if (!file_exists($this->archivepathname)) {
568 return;
571 if (!$fp = fopen($this->archivepathname, 'rb')) {
572 return;
574 if (!$filesize = filesize($this->archivepathname)) {
575 return;
578 $centralend = self::zip_get_central_end($fp, $filesize);
580 if ($centralend === false or $centralend['disk'] !== 0 or $centralend['disk_start'] !== 0 or $centralend['offset'] === 0xFFFFFFFF) {
581 // Single disk archives only and o support for ZIP64, sorry.
582 fclose($fp);
583 return;
586 fseek($fp, $centralend['offset']);
587 $data = fread($fp, $centralend['size']);
588 $pos = 0;
589 $files = array();
590 for($i=0; $i<$centralend['entries']; $i++) {
591 $file = self::zip_parse_file_header($data, $centralend, $pos);
592 if ($file === false) {
593 // Wrong header, sorry.
594 fclose($fp);
595 return;
597 $files[] = $file;
599 fclose($fp);
601 foreach ($files as $file) {
602 $name = $file['name'];
603 if (preg_match('/^[a-zA-Z0-9_\-\.]*$/', $file['name'])) {
604 // No need to fix ASCII.
605 $name = fix_utf8($name);
607 } else if (!($file['general'] & pow(2, 11))) {
608 // First look for unicode name alternatives.
609 $found = false;
610 foreach($file['extra'] as $extra) {
611 if ($extra['id'] === 0x7075) {
612 $data = unpack('cversion/Vcrc', substr($extra['data'], 0, 5));
613 if ($data['crc'] === crc32($name)) {
614 $found = true;
615 $name = substr($extra['data'], 5);
619 if (!$found and !empty($this->encoding) and $this->encoding !== 'utf-8') {
620 // Try the encoding from open().
621 $newname = @core_text::convert($name, $this->encoding, 'utf-8');
622 $original = core_text::convert($newname, 'utf-8', $this->encoding);
623 if ($original === $name) {
624 $found = true;
625 $name = $newname;
628 if (!$found and $file['version'] === 0x315) {
629 // This looks like OS X build in zipper.
630 $newname = fix_utf8($name);
631 if ($newname === $name) {
632 $found = true;
633 $name = $newname;
636 if (!$found and $file['version'] === 0) {
637 // This looks like our old borked Moodle 2.2 file.
638 $newname = fix_utf8($name);
639 if ($newname === $name) {
640 $found = true;
641 $name = $newname;
644 if (!$found and $encoding = get_string('oldcharset', 'langconfig')) {
645 // Last attempt - try the dos/unix encoding from current language.
646 $windows = true;
647 foreach($file['extra'] as $extra) {
648 // In Windows archivers do not usually set any extras with the exception of NTFS flag in WinZip/WinRar.
649 $windows = false;
650 if ($extra['id'] === 0x000a) {
651 $windows = true;
652 break;
656 if ($windows === true) {
657 switch(strtoupper($encoding)) {
658 case 'ISO-8859-1': $encoding = 'CP850'; break;
659 case 'ISO-8859-2': $encoding = 'CP852'; break;
660 case 'ISO-8859-4': $encoding = 'CP775'; break;
661 case 'ISO-8859-5': $encoding = 'CP866'; break;
662 case 'ISO-8859-6': $encoding = 'CP720'; break;
663 case 'ISO-8859-7': $encoding = 'CP737'; break;
664 case 'ISO-8859-8': $encoding = 'CP862'; break;
665 case 'WINDOWS-1251': $encoding = 'CP866'; break;
666 case 'EUC-JP':
667 case 'UTF-8':
668 if ($winchar = get_string('localewincharset', 'langconfig')) {
669 // Most probably works only for zh_cn,
670 // if there are more problems we could add zipcharset to langconfig files.
671 $encoding = $winchar;
673 break;
676 $newname = @core_text::convert($name, $encoding, 'utf-8');
677 $original = core_text::convert($newname, 'utf-8', $encoding);
679 if ($original === $name) {
680 $name = $newname;
684 $name = str_replace('\\', '/', $name); // no MS \ separators
685 $name = clean_param($name, PARAM_PATH); // only safe chars
686 $name = ltrim($name, '/'); // no leading slash
688 if (function_exists('normalizer_normalize')) {
689 $name = normalizer_normalize($name, Normalizer::FORM_C);
692 $this->namelookup[$file['name']] = $name;
697 * Add unicode flag to all files in archive.
699 * NOTE: single disk archives only, no ZIP64 support.
701 * @return bool success, modifies the file contents
703 protected function fix_utf8_flags() {
704 if ($this->emptyziphack) {
705 return true;
708 if (!file_exists($this->archivepathname)) {
709 return true;
712 // Note: the ZIP structure is described at http://www.pkware.com/documents/casestudies/APPNOTE.TXT
713 if (!$fp = fopen($this->archivepathname, 'rb+')) {
714 return false;
716 if (!$filesize = filesize($this->archivepathname)) {
717 return false;
720 $centralend = self::zip_get_central_end($fp, $filesize);
722 if ($centralend === false or $centralend['disk'] !== 0 or $centralend['disk_start'] !== 0 or $centralend['offset'] === 0xFFFFFFFF) {
723 // Single disk archives only and o support for ZIP64, sorry.
724 fclose($fp);
725 return false;
728 fseek($fp, $centralend['offset']);
729 $data = fread($fp, $centralend['size']);
730 $pos = 0;
731 $files = array();
732 for($i=0; $i<$centralend['entries']; $i++) {
733 $file = self::zip_parse_file_header($data, $centralend, $pos);
734 if ($file === false) {
735 // Wrong header, sorry.
736 fclose($fp);
737 return false;
740 $newgeneral = $file['general'] | pow(2, 11);
741 if ($newgeneral === $file['general']) {
742 // Nothing to do with this file.
743 continue;
746 if (preg_match('/^[a-zA-Z0-9_\-\.]*$/', $file['name'])) {
747 // ASCII file names are always ok.
748 continue;
750 if ($file['extra']) {
751 // Most probably not created by php zip ext, better to skip it.
752 continue;
754 if (fix_utf8($file['name']) !== $file['name']) {
755 // Does not look like a valid utf-8 encoded file name, skip it.
756 continue;
759 // Read local file header.
760 fseek($fp, $file['local_offset']);
761 $localfile = unpack('Vsig/vversion_req/vgeneral/vmethod/vmtime/vmdate/Vcrc/Vsize_compressed/Vsize/vname_length/vextra_length', fread($fp, 30));
762 if ($localfile['sig'] !== 0x04034b50) {
763 // Borked file!
764 fclose($fp);
765 return false;
768 $file['local'] = $localfile;
769 $files[] = $file;
772 foreach ($files as $file) {
773 $localfile = $file['local'];
774 // Add the unicode flag in central file header.
775 fseek($fp, $file['central_offset'] + 8);
776 if (ftell($fp) === $file['central_offset'] + 8) {
777 $newgeneral = $file['general'] | pow(2, 11);
778 fwrite($fp, pack('v', $newgeneral));
780 // Modify local file header too.
781 fseek($fp, $file['local_offset'] + 6);
782 if (ftell($fp) === $file['local_offset'] + 6) {
783 $newgeneral = $localfile['general'] | pow(2, 11);
784 fwrite($fp, pack('v', $newgeneral));
788 fclose($fp);
789 return true;
793 * Read end of central signature of ZIP file.
794 * @internal
795 * @static
796 * @param resource $fp
797 * @param int $filesize
798 * @return array|bool
800 public static function zip_get_central_end($fp, $filesize) {
801 // Find end of central directory record.
802 fseek($fp, $filesize - 22);
803 $info = unpack('Vsig', fread($fp, 4));
804 if ($info['sig'] === 0x06054b50) {
805 // There is no comment.
806 fseek($fp, $filesize - 22);
807 $data = fread($fp, 22);
808 } else {
809 // There is some comment with 0xFF max size - that is 65557.
810 fseek($fp, $filesize - 65557);
811 $data = fread($fp, 65557);
814 $pos = strpos($data, pack('V', 0x06054b50));
815 if ($pos === false) {
816 // Borked ZIP structure!
817 return false;
819 $centralend = unpack('Vsig/vdisk/vdisk_start/vdisk_entries/ventries/Vsize/Voffset/vcomment_length', substr($data, $pos, 22));
820 if ($centralend['comment_length']) {
821 $centralend['comment'] = substr($data, 22, $centralend['comment_length']);
822 } else {
823 $centralend['comment'] = '';
826 return $centralend;
830 * Parse file header.
831 * @internal
832 * @param string $data
833 * @param array $centralend
834 * @param int $pos (modified)
835 * @return array|bool file info
837 public static function zip_parse_file_header($data, $centralend, &$pos) {
838 $file = unpack('Vsig/vversion/vversion_req/vgeneral/vmethod/Vmodified/Vcrc/Vsize_compressed/Vsize/vname_length/vextra_length/vcomment_length/vdisk/vattr/Vattrext/Vlocal_offset', substr($data, $pos, 46));
839 $file['central_offset'] = $centralend['offset'] + $pos;
840 $pos = $pos + 46;
841 if ($file['sig'] !== 0x02014b50) {
842 // Borked ZIP structure!
843 return false;
845 $file['name'] = substr($data, $pos, $file['name_length']);
846 $pos = $pos + $file['name_length'];
847 $file['extra'] = array();
848 $file['extra_data'] = '';
849 if ($file['extra_length']) {
850 $extradata = substr($data, $pos, $file['extra_length']);
851 $file['extra_data'] = $extradata;
852 while (strlen($extradata) > 4) {
853 $extra = unpack('vid/vsize', substr($extradata, 0, 4));
854 $extra['data'] = substr($extradata, 4, $extra['size']);
855 $extradata = substr($extradata, 4+$extra['size']);
856 $file['extra'][] = $extra;
858 $pos = $pos + $file['extra_length'];
860 if ($file['comment_length']) {
861 $pos = $pos + $file['comment_length'];
862 $file['comment'] = substr($data, $pos, $file['comment_length']);
863 } else {
864 $file['comment'] = '';
866 return $file;