Release 2015-08-10 "Detritus"
[dokuwiki.git] / inc / Tar.class.php
blob57c280d7991d9646dd10835d1880067686da59e4
1 <?php
2 /**
3 * This class allows the extraction of existing and the creation of new Unix TAR archives.
4 * To keep things simple, the modification of existing archives is not supported. It handles
5 * uncompressed, gzip and bzip2 compressed tar files.
7 * Long pathnames (>100 chars) are supported in POSIX ustar and GNU longlink formats.
9 * To list the contents of an existing TAR archive, open() it and use contents() on it:
11 * $tar = new Tar();
12 * $tar->open('myfile.tgz');
13 * $toc = $tar->contents();
14 * print_r($toc);
16 * To extract the contents of an existing TAR archive, open() it and use extract() on it:
18 * $tar = new Tar();
19 * $tar->open('myfile.tgz');
20 * $tar->extract('/tmp');
22 * To create a new TAR archive directly on the filesystem (low memory requirements), create() it,
23 * add*() files and close() it:
25 * $tar = new Tar();
26 * $tar->create('myfile.tgz');
27 * $tar->addFile(...);
28 * $tar->addData(...);
29 * ...
30 * $tar->close();
32 * To create a TAR archive directly in memory, create() it, add*() files and then either save()
33 * or getData() it:
35 * $tar = new Tar();
36 * $tar->create();
37 * $tar->addFile(...);
38 * $tar->addData(...);
39 * ...
40 * $tar->save('myfile.tgz'); // compresses and saves it
41 * echo $tar->getArchive(Tar::COMPRESS_GZIP); // compresses and returns it
43 * @author Andreas Gohr <andi@splitbrain.org>
44 * @author Bouchon <tarlib@bouchon.org> (Maxg)
45 * @license GPL 2
46 * @deprecated 2015-05-15 - use splitbrain\PHPArchive\Tar instead
48 class Tar {
50 const COMPRESS_AUTO = 0;
51 const COMPRESS_NONE = 1;
52 const COMPRESS_GZIP = 2;
53 const COMPRESS_BZIP = 3;
55 protected $file = '';
56 protected $comptype = Tar::COMPRESS_AUTO;
57 /** @var resource|int */
58 protected $fh;
59 protected $memory = '';
60 protected $closed = true;
61 protected $writeaccess = false;
63 /**
64 * Open an existing TAR file for reading
66 * @param string $file
67 * @param int $comptype
68 * @throws TarIOException
70 public function open($file, $comptype = Tar::COMPRESS_AUTO) {
71 // determine compression
72 if($comptype == Tar::COMPRESS_AUTO) $comptype = $this->filetype($file);
73 $this->compressioncheck($comptype);
75 $this->comptype = $comptype;
76 $this->file = $file;
78 if($this->comptype === Tar::COMPRESS_GZIP) {
79 $this->fh = @gzopen($this->file, 'rb');
80 } elseif($this->comptype === Tar::COMPRESS_BZIP) {
81 $this->fh = @bzopen($this->file, 'r');
82 } else {
83 $this->fh = @fopen($this->file, 'rb');
86 if(!$this->fh) throw new TarIOException('Could not open file for reading: '.$this->file);
87 $this->closed = false;
90 /**
91 * Read the contents of a TAR archive
93 * This function lists the files stored in the archive, and returns an indexed array of associative
94 * arrays containing for each file the following information:
96 * checksum Tar Checksum of the file
97 * filename The full name of the stored file (up to 100 c.)
98 * mode UNIX permissions in DECIMAL, not octal
99 * uid The Owner ID
100 * gid The Group ID
101 * size Uncompressed filesize
102 * mtime Timestamp of last modification
103 * typeflag Empty for files, set for folders
104 * link Is it a symlink?
105 * uname Owner name
106 * gname Group name
108 * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams.
109 * Reopen the file with open() again if you want to do additional operations
111 * @return array
112 * @throws TarIOException
114 public function contents() {
115 if($this->closed || !$this->file) throw new TarIOException('Can not read from a closed archive');
117 $result = array();
118 while($read = $this->readbytes(512)) {
119 $header = $this->parseHeader($read);
120 if(!is_array($header)) continue;
122 $this->skipbytes(ceil($header['size'] / 512) * 512);
123 $result[] = $header;
126 $this->close();
127 return $result;
131 * Extract an existing TAR archive
133 * The $strip parameter allows you to strip a certain number of path components from the filenames
134 * found in the tar file, similar to the --strip-components feature of GNU tar. This is triggered when
135 * an integer is passed as $strip.
136 * Alternatively a fixed string prefix may be passed in $strip. If the filename matches this prefix,
137 * the prefix will be stripped. It is recommended to give prefixes with a trailing slash.
139 * By default this will extract all files found in the archive. You can restrict the output using the $include
140 * and $exclude parameter. Both expect a full regular expression (including delimiters and modifiers). If
141 * $include is set only files that match this expression will be extracted. Files that match the $exclude
142 * expression will never be extracted. Both parameters can be used in combination. Expressions are matched against
143 * stripped filenames as described above.
145 * The archive is closed afer reading the contents, because rewinding is not possible in bzip2 streams.
146 * Reopen the file with open() again if you want to do additional operations
148 * @param string $outdir the target directory for extracting
149 * @param int|string $strip either the number of path components or a fixed prefix to strip
150 * @param string $exclude a regular expression of files to exclude
151 * @param string $include a regular expression of files to include
152 * @throws TarIOException
153 * @return array
155 function extract($outdir, $strip = '', $exclude = '', $include = '') {
156 if($this->closed || !$this->file) throw new TarIOException('Can not read from a closed archive');
158 $outdir = rtrim($outdir, '/');
159 io_mkdir_p($outdir);
160 $striplen = strlen($strip);
162 $extracted = array();
164 while($dat = $this->readbytes(512)) {
165 // read the file header
166 $header = $this->parseHeader($dat);
167 if(!is_array($header)) continue;
168 if(!$header['filename']) continue;
170 // strip prefix
171 $filename = $this->cleanPath($header['filename']);
172 if(is_int($strip)) {
173 // if $strip is an integer we strip this many path components
174 $parts = explode('/', $filename);
175 if(!$header['typeflag']) {
176 $base = array_pop($parts); // keep filename itself
177 } else {
178 $base = '';
180 $filename = join('/', array_slice($parts, $strip));
181 if($base) $filename .= "/$base";
182 } else {
183 // ifstrip is a string, we strip a prefix here
184 if(substr($filename, 0, $striplen) == $strip) $filename = substr($filename, $striplen);
187 // check if this should be extracted
188 $extract = true;
189 if(!$filename) {
190 $extract = false;
191 } else {
192 if($include) {
193 if(preg_match($include, $filename)) {
194 $extract = true;
195 } else {
196 $extract = false;
199 if($exclude && preg_match($exclude, $filename)) {
200 $extract = false;
204 // Now do the extraction (or not)
205 if($extract) {
206 $extracted[] = $header;
208 $output = "$outdir/$filename";
209 $directory = ($header['typeflag']) ? $output : dirname($output);
210 io_mkdir_p($directory);
212 // is this a file?
213 if(!$header['typeflag']) {
214 $fp = fopen($output, "wb");
215 if(!$fp) throw new TarIOException('Could not open file for writing: '.$output);
217 $size = floor($header['size'] / 512);
218 for($i = 0; $i < $size; $i++) {
219 fwrite($fp, $this->readbytes(512), 512);
221 if(($header['size'] % 512) != 0) fwrite($fp, $this->readbytes(512), $header['size'] % 512);
223 fclose($fp);
224 touch($output, $header['mtime']);
225 chmod($output, $header['perm']);
226 } else {
227 $this->skipbytes(ceil($header['size'] / 512) * 512); // the size is usually 0 for directories
229 } else {
230 $this->skipbytes(ceil($header['size'] / 512) * 512);
234 $this->close();
235 return $extracted;
239 * Create a new TAR file
241 * If $file is empty, the tar file will be created in memory
243 * @param string $file
244 * @param int $comptype
245 * @param int $complevel
246 * @throws TarIOException
247 * @throws TarIllegalCompressionException
249 public function create($file = '', $comptype = Tar::COMPRESS_AUTO, $complevel = 9) {
250 // determine compression
251 if($comptype == Tar::COMPRESS_AUTO) $comptype = $this->filetype($file);
252 $this->compressioncheck($comptype);
254 $this->comptype = $comptype;
255 $this->file = $file;
256 $this->memory = '';
257 $this->fh = 0;
259 if($this->file) {
260 if($this->comptype === Tar::COMPRESS_GZIP) {
261 $this->fh = @gzopen($this->file, 'wb'.$complevel);
262 } elseif($this->comptype === Tar::COMPRESS_BZIP) {
263 $this->fh = @bzopen($this->file, 'w');
264 } else {
265 $this->fh = @fopen($this->file, 'wb');
268 if(!$this->fh) throw new TarIOException('Could not open file for writing: '.$this->file);
270 $this->writeaccess = true;
271 $this->closed = false;
275 * Add a file to the current TAR archive using an existing file in the filesystem
277 * @todo handle directory adding
279 * @param string $file the original file
280 * @param string $name the name to use for the file in the archive
281 * @throws TarIOException
283 public function addFile($file, $name = '') {
284 if($this->closed) throw new TarIOException('Archive has been closed, files can no longer be added');
286 if(!$name) $name = $file;
287 $name = $this->cleanPath($name);
289 $fp = fopen($file, 'rb');
290 if(!$fp) throw new TarIOException('Could not open file for reading: '.$file);
292 // create file header and copy all stat info from the original file
293 clearstatcache(false, $file);
294 $stat = stat($file);
295 $this->writeFileHeader(
296 $name,
297 $stat[4],
298 $stat[5],
299 fileperms($file),
300 filesize($file),
301 filemtime($file)
304 while(!feof($fp)) {
305 $data = fread($fp, 512);
306 if($data === false) break;
307 if($data === '') break;
308 $packed = pack("a512", $data);
309 $this->writebytes($packed);
311 fclose($fp);
315 * Add a file to the current TAR archive using the given $data as content
317 * @param string $name
318 * @param string $data
319 * @param int $uid
320 * @param int $gid
321 * @param int $perm
322 * @param int $mtime
323 * @throws TarIOException
325 public function addData($name, $data, $uid = 0, $gid = 0, $perm = 0666, $mtime = 0) {
326 if($this->closed) throw new TarIOException('Archive has been closed, files can no longer be added');
328 $name = $this->cleanPath($name);
329 $len = strlen($data);
331 $this->writeFileHeader(
332 $name,
333 $uid,
334 $gid,
335 $perm,
336 $len,
337 ($mtime) ? $mtime : time()
340 for($s = 0; $s < $len; $s += 512) {
341 $this->writebytes(pack("a512", substr($data, $s, 512)));
346 * Add the closing footer to the archive if in write mode, close all file handles
348 * After a call to this function no more data can be added to the archive, for
349 * read access no reading is allowed anymore
351 * "Physically, an archive consists of a series of file entries terminated by an end-of-archive entry, which
352 * consists of two 512 blocks of zero bytes"
354 * @link http://www.gnu.org/software/tar/manual/html_chapter/tar_8.html#SEC134
356 public function close() {
357 if($this->closed) return; // we did this already
359 // write footer
360 if($this->writeaccess) {
361 $this->writebytes(pack("a512", ""));
362 $this->writebytes(pack("a512", ""));
365 // close file handles
366 if($this->file) {
367 if($this->comptype === Tar::COMPRESS_GZIP) {
368 gzclose($this->fh);
369 } elseif($this->comptype === Tar::COMPRESS_BZIP) {
370 bzclose($this->fh);
371 } else {
372 fclose($this->fh);
375 $this->file = '';
376 $this->fh = 0;
379 $this->closed = true;
383 * Returns the created in-memory archive data
385 * This implicitly calls close() on the Archive
387 * @param int $comptype
388 * @param int $complevel
389 * @return mixed|string
391 public function getArchive($comptype = Tar::COMPRESS_AUTO, $complevel = 9) {
392 $this->close();
394 if($comptype === Tar::COMPRESS_AUTO) $comptype = $this->comptype;
395 $this->compressioncheck($comptype);
397 if($comptype === Tar::COMPRESS_GZIP) return gzcompress($this->memory, $complevel);
398 if($comptype === Tar::COMPRESS_BZIP) return bzcompress($this->memory);
399 return $this->memory;
403 * Save the created in-memory archive data
405 * Note: It more memory effective to specify the filename in the create() function and
406 * let the library work on the new file directly.
408 * @param string $file
409 * @param int $comptype
410 * @param int $complevel
411 * @throws TarIOException
413 public function save($file, $comptype = Tar::COMPRESS_AUTO, $complevel = 9) {
414 if($comptype === Tar::COMPRESS_AUTO) $comptype = $this->filetype($file);
416 if(!file_put_contents($file, $this->getArchive($comptype, $complevel))) {
417 throw new TarIOException('Could not write to file: '.$file);
422 * Read from the open file pointer
424 * @param int $length bytes to read
425 * @return string
427 protected function readbytes($length) {
428 if($this->comptype === Tar::COMPRESS_GZIP) {
429 return @gzread($this->fh, $length);
430 } elseif($this->comptype === Tar::COMPRESS_BZIP) {
431 return @bzread($this->fh, $length);
432 } else {
433 return @fread($this->fh, $length);
438 * Write to the open filepointer or memory
440 * @param string $data
441 * @throws TarIOException
442 * @return int number of bytes written
444 protected function writebytes($data) {
445 if(!$this->file) {
446 $this->memory .= $data;
447 $written = strlen($data);
448 } elseif($this->comptype === Tar::COMPRESS_GZIP) {
449 $written = @gzwrite($this->fh, $data);
450 } elseif($this->comptype === Tar::COMPRESS_BZIP) {
451 $written = @bzwrite($this->fh, $data);
452 } else {
453 $written = @fwrite($this->fh, $data);
455 if($written === false) throw new TarIOException('Failed to write to archive stream');
456 return $written;
460 * Skip forward in the open file pointer
462 * This is basically a wrapper around seek() (and a workaround for bzip2)
464 * @param int $bytes seek to this position
466 function skipbytes($bytes) {
467 if($this->comptype === Tar::COMPRESS_GZIP) {
468 @gzseek($this->fh, $bytes, SEEK_CUR);
469 } elseif($this->comptype === Tar::COMPRESS_BZIP) {
470 // there is no seek in bzip2, we simply read on
471 @bzread($this->fh, $bytes);
472 } else {
473 @fseek($this->fh, $bytes, SEEK_CUR);
478 * Write a file header
480 * @param string $name
481 * @param int $uid
482 * @param int $gid
483 * @param int $perm
484 * @param int $size
485 * @param int $mtime
486 * @param string $typeflag Set to '5' for directories
488 protected function writeFileHeader($name, $uid, $gid, $perm, $size, $mtime, $typeflag = '') {
489 // handle filename length restrictions
490 $prefix = '';
491 $namelen = strlen($name);
492 if($namelen > 100) {
493 $file = basename($name);
494 $dir = dirname($name);
495 if(strlen($file) > 100 || strlen($dir) > 155) {
496 // we're still too large, let's use GNU longlink
497 $this->writeFileHeader('././@LongLink', 0, 0, 0, $namelen, 0, 'L');
498 for($s = 0; $s < $namelen; $s += 512) {
499 $this->writebytes(pack("a512", substr($name, $s, 512)));
501 $name = substr($name, 0, 100); // cut off name
502 } else {
503 // we're fine when splitting, use POSIX ustar
504 $prefix = $dir;
505 $name = $file;
509 // values are needed in octal
510 $uid = sprintf("%6s ", decoct($uid));
511 $gid = sprintf("%6s ", decoct($gid));
512 $perm = sprintf("%6s ", decoct($perm));
513 $size = sprintf("%11s ", decoct($size));
514 $mtime = sprintf("%11s", decoct($mtime));
516 $data_first = pack("a100a8a8a8a12A12", $name, $perm, $uid, $gid, $size, $mtime);
517 $data_last = pack("a1a100a6a2a32a32a8a8a155a12", $typeflag, '', 'ustar', '', '', '', '', '', $prefix, "");
519 for($i = 0, $chks = 0; $i < 148; $i++)
520 $chks += ord($data_first[$i]);
522 for($i = 156, $chks += 256, $j = 0; $i < 512; $i++, $j++)
523 $chks += ord($data_last[$j]);
525 $this->writebytes($data_first);
527 $chks = pack("a8", sprintf("%6s ", decoct($chks)));
528 $this->writebytes($chks.$data_last);
532 * Decode the given tar file header
534 * @param string $block a 512 byte block containign the header data
535 * @return false|array
537 protected function parseHeader($block) {
538 if(!$block || strlen($block) != 512) return false;
540 for($i = 0, $chks = 0; $i < 148; $i++)
541 $chks += ord($block[$i]);
543 for($i = 156, $chks += 256; $i < 512; $i++)
544 $chks += ord($block[$i]);
546 $header = @unpack("a100filename/a8perm/a8uid/a8gid/a12size/a12mtime/a8checksum/a1typeflag/a100link/a6magic/a2version/a32uname/a32gname/a8devmajor/a8devminor/a155prefix", $block);
547 if(!$header) return false;
549 $return = array();
550 $return['checksum'] = OctDec(trim($header['checksum']));
551 if($return['checksum'] != $chks) return false;
553 $return['filename'] = trim($header['filename']);
554 $return['perm'] = OctDec(trim($header['perm']));
555 $return['uid'] = OctDec(trim($header['uid']));
556 $return['gid'] = OctDec(trim($header['gid']));
557 $return['size'] = OctDec(trim($header['size']));
558 $return['mtime'] = OctDec(trim($header['mtime']));
559 $return['typeflag'] = $header['typeflag'];
560 $return['link'] = trim($header['link']);
561 $return['uname'] = trim($header['uname']);
562 $return['gname'] = trim($header['gname']);
564 // Handle ustar Posix compliant path prefixes
565 if(trim($header['prefix'])) $return['filename'] = trim($header['prefix']).'/'.$return['filename'];
567 // Handle Long-Link entries from GNU Tar
568 if($return['typeflag'] == 'L') {
569 // following data block(s) is the filename
570 $filename = trim($this->readbytes(ceil($header['size'] / 512) * 512));
571 // next block is the real header
572 $block = $this->readbytes(512);
573 $return = $this->parseHeader($block);
574 // overwrite the filename
575 $return['filename'] = $filename;
578 return $return;
582 * Cleans up a path and removes relative parts, also strips leading slashes
584 * @param string $path
585 * @return string
587 public function cleanPath($path) {
588 $path=explode('/', $path);
589 $newpath=array();
590 foreach($path as $p) {
591 if ($p === '' || $p === '.') continue;
592 if ($p==='..') {
593 array_pop($newpath);
594 continue;
596 array_push($newpath, $p);
598 return trim(implode('/', $newpath), '/');
602 * Checks if the given compression type is available and throws an exception if not
604 * @param int $comptype
605 * @throws TarIllegalCompressionException
607 protected function compressioncheck($comptype) {
608 if($comptype === Tar::COMPRESS_GZIP && !function_exists('gzopen')) {
609 throw new TarIllegalCompressionException('No gzip support available');
612 if($comptype === Tar::COMPRESS_BZIP && !function_exists('bzopen')) {
613 throw new TarIllegalCompressionException('No bzip2 support available');
618 * Guesses the wanted compression from the given filename extension
620 * You don't need to call this yourself. It's used when you pass Tar::COMPRESS_AUTO somewhere
622 * @param string $file
623 * @return int
625 public function filetype($file) {
626 $file = strtolower($file);
627 if(substr($file, -3) == '.gz' || substr($file, -4) == '.tgz') {
628 $comptype = Tar::COMPRESS_GZIP;
629 } elseif(substr($file, -4) == '.bz2' || substr($file, -4) == '.tbz') {
630 $comptype = Tar::COMPRESS_BZIP;
631 } else {
632 $comptype = Tar::COMPRESS_NONE;
634 return $comptype;
639 * Class TarIOException
641 class TarIOException extends Exception {
645 * Class TarIllegalCompressionException
647 class TarIllegalCompressionException extends Exception {