2 /* 7 December 2006. version 1.0
4 * This is the php version of the Dean Edwards JavaScript 's Packer,
7 * ParseMaster, version 1.0.2 (2005-08-19) Copyright 2005, Dean Edwards
8 * a multi-pattern parser.
9 * KNOWN BUG: erroneous behavior when using escapeChar with a replacement
10 * value that is a function
12 * packer, version 2.0.2 (2005-08-19) Copyright 2004-2005, Dean Edwards
14 * License: http://creativecommons.org/licenses/LGPL/2.1/
16 * Ported to PHP by Nicolas Martin.
18 * ----------------------------------------------------------------------
21 * $myPacker = new JavaScriptPacker($script, 62, true, false);
22 * $packed = $myPacker->pack();
26 * $myPacker = new JavaScriptPacker($script, 'Normal', true, false);
27 * $packed = $myPacker->pack();
31 * $myPacker = new JavaScriptPacker($script);
32 * $packed = $myPacker->pack();
35 * params of the constructor :
36 * $script: the JavaScript to pack, string.
37 * $encoding: level of encoding, int or string :
38 * 0,10,62,95 or 'None', 'Numeric', 'Normal', 'High ASCII'.
40 * $fastDecode: include the fast decoder in the packed result, boolean.
42 * $specialChars: if you are flagged your private and local variables
43 * in the script, boolean.
46 * The pack() method return the compressed JavasScript, as a string.
48 * see http://dean.edwards.name/packer/usage/ for more information.
51 * # need PHP 5 . Tested with PHP 5.1.2
53 * # The packed result may be different than with the Dean Edwards
54 * version, but with the same length. The reason is that the PHP
55 * function usort to sort array don't necessarily preserve the
56 * original order of two equal member. The Javascript sort function
57 * in fact preserve this order (but that's not require by the
58 * ECMAScript standard). So the encoded keywords order can be
59 * different in the two results.
61 * # Be careful with the 'High ASCII' Level encoding if you use
62 * UTF-8 in your files...
66 class JavaScriptPacker
{
70 // validate parameters
71 private $_script = '';
72 private $_encoding = 62;
73 private $_fastDecode = true;
74 private $_specialChars = false;
76 private $LITERAL_ENCODING = array(
83 public function __construct($_script, $_encoding = 62, $_fastDecode = true, $_specialChars = false)
85 $this->_script
= $_script . "\n";
86 if (array_key_exists($_encoding, $this->LITERAL_ENCODING
))
87 $_encoding = $this->LITERAL_ENCODING
[$_encoding];
88 $this->_encoding
= min((int)$_encoding, 95);
89 $this->_fastDecode
= $_fastDecode;
90 $this->_specialChars
= $_specialChars;
93 public function pack() {
94 $this->_addParser('_basicCompression');
95 if ($this->_specialChars
)
96 $this->_addParser('_encodeSpecialChars');
98 $this->_addParser('_encodeKeywords');
101 return $this->_pack($this->_script
);
104 // apply all parsing routines
105 private function _pack($script) {
106 for ($i = 0; isset($this->_parsers
[$i]); $i++
) {
107 $script = call_user_func(array(&$this,$this->_parsers
[$i]), $script);
112 // keep a list of parsing functions, they'll be executed all at once
113 private $_parsers = array();
114 private function _addParser($parser) {
115 $this->_parsers
[] = $parser;
118 // zero encoding - just removal of white space and comments
119 private function _basicCompression($script) {
120 $parser = new ParseMaster();
122 $parser->escapeChar
= '\\';
124 $parser->add('/\'[^\'\\n\\r]*\'/', self
::IGNORE
);
125 $parser->add('/"[^"\\n\\r]*"/', self
::IGNORE
);
127 $parser->add('/\\/\\/[^\\n\\r]*[\\n\\r]/', ' ');
128 $parser->add('/\\/\\*[^*]*\\*+([^\\/][^*]*\\*+)*\\//', ' ');
129 // protect regular expressions
130 $parser->add('/\\s+(\\/[^\\/\\n\\r\\*][^\\/\\n\\r]*\\/g?i?)/', '$2'); // IGNORE
131 $parser->add('/[^\\w\\x24\\/\'"*)\\?:]\\/[^\\/\\n\\r\\*][^\\/\\n\\r]*\\/g?i?/', self
::IGNORE
);
132 // remove: ;;; doSomething();
133 if ($this->_specialChars
) $parser->add('/;;;[^\\n\\r]+[\\n\\r]/');
134 // remove redundant semi-colons
135 $parser->add('/\\(;;\\)/', self
::IGNORE
); // protect for (;;) loops
136 $parser->add('/;+\\s*([};])/', '$2');
138 $script = $parser->exec($script);
140 // remove white-space
141 $parser->add('/(\\b|\\x24)\\s+(\\b|\\x24)/', '$2 $3');
142 $parser->add('/([+\\-])\\s+([+\\-])/', '$2 $3');
143 $parser->add('/\\s+/', '');
145 return $parser->exec($script);
148 private function _encodeSpecialChars($script) {
149 $parser = new ParseMaster();
150 // replace: $name -> n, $$name -> na
151 $parser->add('/((\\x24+)([a-zA-Z$_]+))(\\d*)/',
152 array('fn' => '_replace_name')
154 // replace: _name -> _0, double-underscore (__name) is ignored
155 $regexp = '/\\b_[A-Za-z\\d]\\w*/';
156 // build the word list
157 $keywords = $this->_analyze($script, $regexp, '_encodePrivate');
159 $encoded = $keywords['encoded'];
161 $parser->add($regexp,
163 'fn' => '_replace_encoded',
167 return $parser->exec($script);
170 private function _encodeKeywords($script) {
171 // escape high-ascii values already in the script (i.e. in strings)
172 if ($this->_encoding
> 62)
173 $script = $this->_escape95($script);
175 $parser = new ParseMaster();
176 $encode = $this->_getEncoder($this->_encoding
);
177 // for high-ascii, don't encode single character low-ascii
178 $regexp = ($this->_encoding
> 62) ?
'/\\w\\w+/' : '/\\w+/';
179 // build the word list
180 $keywords = $this->_analyze($script, $regexp, $encode);
181 $encoded = $keywords['encoded'];
184 $parser->add($regexp,
186 'fn' => '_replace_encoded',
190 if (empty($script)) return $script;
192 //$res = $parser->exec($script);
193 //$res = $this->_bootStrap($res, $keywords);
195 return $this->_bootStrap($parser->exec($script), $keywords);
199 private function _analyze($script, $regexp, $encode) {
201 // retreive all words in the script
203 preg_match_all($regexp, $script, $all);
204 $_sorted = array(); // list of words sorted by frequency
205 $_encoded = array(); // dictionary of word->encoding
206 $_protected = array(); // instances of "protected" words
207 $all = $all[0]; // simulate the javascript comportement of global match
209 $unsorted = array(); // same list, not sorted
210 $protected = array(); // "protected" words (dictionary of word->"word")
211 $value = array(); // dictionary of charCode->encoding (eg. 256->ff)
212 $this->_count
= array(); // word->count
213 $i = count($all); $j = 0; //$word = null;
214 // count the occurrences - used for sorting later
217 $word = '$' . $all[$i];
218 if (!isset($this->_count
[$word])) {
219 $this->_count
[$word] = 0;
220 $unsorted[$j] = $word;
221 // make a dictionary of all of the protected words in this script
222 // these are words that might be mistaken for encoding
223 //if (is_string($encode) && method_exists($this, $encode))
224 $values[$j] = call_user_func(array(&$this, $encode), $j);
225 $protected['$' . $values[$j]] = $j++
;
227 // increment the word counter
228 $this->_count
[$word]++
;
230 // prepare to sort the word list, first we must protect
231 // words that are also used as codes. we assign them a code
232 // equivalent to the word itself.
233 // e.g. if "do" falls within our encoding range
234 // then we store keywords["do"] = "do";
235 // this avoids problems when decoding
236 $i = count($unsorted);
238 $word = $unsorted[--$i];
239 if (isset($protected[$word]) /*!= null*/) {
240 $_sorted[$protected[$word]] = substr($word, 1);
241 $_protected[$protected[$word]] = true;
242 $this->_count
[$word] = 0;
246 // sort the words by frequency
247 // Note: the javascript and php version of sort can be different :
248 // in php manual, usort :
249 // " If two members compare as equal,
250 // their order in the sorted array is undefined."
251 // so the final packed script is different of the Dean's javascript version
253 // the ECMAscript standard does not guarantee this behaviour,
254 // and thus not all browsers (e.g. Mozilla versions dating back to at
255 // least 2003) respect this.
256 usort($unsorted, array(&$this, '_sortWords'));
258 // because there are "protected" words in the list
259 // we must add the sorted words around them
261 if (!isset($_sorted[$i]))
262 $_sorted[$i] = substr($unsorted[$j++
], 1);
263 $_encoded[$_sorted[$i]] = $values[$i];
264 } while (++
$i < count($unsorted));
267 'sorted' => $_sorted,
268 'encoded' => $_encoded,
269 'protected' => $_protected);
272 private $_count = array();
273 private function _sortWords($match1, $match2) {
274 return $this->_count
[$match2] - $this->_count
[$match1];
277 // build the boot function used for loading and decoding
278 private function _bootStrap($packed, $keywords) {
279 $ENCODE = $this->_safeRegExp('$encode\\($count\\)');
281 // $packed: the packed script
282 $packed = "'" . $this->_escape($packed) . "'";
284 // $ascii: base for encoding
285 $ascii = min(count($keywords['sorted']), $this->_encoding
);
286 if ($ascii == 0) $ascii = 1;
288 // $count: number of words contained in the script
289 $count = count($keywords['sorted']);
291 // $keywords: list of words contained in the script
292 foreach ($keywords['protected'] as $i=>$value) {
293 $keywords['sorted'][$i] = '';
295 // convert from a string to an array
296 ksort($keywords['sorted']);
297 $keywords = "'" . implode('|',$keywords['sorted']) . "'.split('|')";
299 $encode = ($this->_encoding
> 62) ?
'_encode95' : $this->_getEncoder($ascii);
300 $encode = $this->_getJSFunction($encode);
301 $encode = preg_replace('/_encoding/','$ascii', $encode);
302 $encode = preg_replace('/arguments\\.callee/','$encode', $encode);
303 $inline = '\\$count' . ($ascii > 10 ?
'.toString(\\$ascii)' : '');
305 // $decode: code snippet to speed up decoding
306 if ($this->_fastDecode
) {
307 // create the decoder
308 $decode = $this->_getJSFunction('_decodeBody');
309 if ($this->_encoding
> 62)
310 $decode = preg_replace('/\\\\w/', '[\\xa1-\\xff]', $decode);
311 // perform the encoding inline for lower ascii values
313 $decode = preg_replace($ENCODE, $inline, $decode);
314 // special case: when $count==0 there are no keywords. I want to keep
315 // the basic shape of the unpacking funcion so i'll frig the code...
317 $decode = preg_replace($this->_safeRegExp('($count)\\s*=\\s*1'), '$1=0', $decode, 1);
321 $unpack = $this->_getJSFunction('_unpack');
322 if ($this->_fastDecode
) {
323 // insert the decoder
324 $this->buffer
= $decode;
325 $unpack = preg_replace_callback('/\\{/', array(&$this, '_insertFastDecode'), $unpack, 1);
327 $unpack = preg_replace('/"/', "'", $unpack);
328 if ($this->_encoding
> 62) { // high-ascii
329 // get rid of the word-boundaries for regexp matches
330 $unpack = preg_replace('/\'\\\\\\\\b\'\s*\\+|\\+\s*\'\\\\\\\\b\'/', '', $unpack);
332 if ($ascii > 36 ||
$this->_encoding
> 62 ||
$this->_fastDecode
) {
333 // insert the encode function
334 $this->buffer
= $encode;
335 $unpack = preg_replace_callback('/\\{/', array(&$this, '_insertFastEncode'), $unpack, 1);
337 // perform the encoding inline
338 $unpack = preg_replace($ENCODE, $inline, $unpack);
340 // pack the boot function too
341 $unpackPacker = new JavaScriptPacker($unpack, 0, false, true);
342 $unpack = $unpackPacker->pack();
345 $params = array($packed, $ascii, $count, $keywords);
346 if ($this->_fastDecode
) {
350 $params = implode(',', $params);
353 return 'eval(' . $unpack . '(' . $params . "))\n";
357 private function _insertFastDecode($match) {
358 return '{' . $this->buffer
. ';';
360 private function _insertFastEncode($match) {
361 return '{$encode=' . $this->buffer
. ';';
364 // mmm.. ..which one do i need ??
365 private function _getEncoder($ascii) {
366 return $ascii > 10 ?
$ascii > 36 ?
$ascii > 62 ?
367 '_encode95' : '_encode62' : '_encode36' : '_encode10';
371 // characters: 0123456789
372 private function _encode10($charCode) {
376 // inherent base36 support
377 // characters: 0123456789abcdefghijklmnopqrstuvwxyz
378 private function _encode36($charCode) {
379 return base_convert($charCode, 10, 36);
382 // hitch a ride on base36 and add the upper case alpha characters
383 // characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
384 private function _encode62($charCode) {
386 if ($charCode >= $this->_encoding
) {
387 $res = $this->_encode62((int)($charCode / $this->_encoding
));
389 $charCode = $charCode %
$this->_encoding
;
392 return $res . chr($charCode +
29);
394 return $res . base_convert($charCode, 10, 36);
397 // use high-ascii values
398 // characters: ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ
399 private function _encode95($charCode) {
401 if ($charCode >= $this->_encoding
)
402 $res = $this->_encode95($charCode / $this->_encoding
);
404 return $res . chr(($charCode %
$this->_encoding
) +
161);
407 private function _safeRegExp($string) {
408 return '/'.preg_replace('/\$/', '\\\$', $string).'/';
411 private function _encodePrivate($charCode) {
412 return "_" . $charCode;
415 // protect characters used by the parser
416 private function _escape($script) {
417 return preg_replace('/([\\\\\'])/', '\\\$1', $script);
420 // protect high-ascii characters already in the script
421 private function _escape95($script) {
422 return preg_replace_callback(
424 array(&$this, '_escape95Bis'),
428 private function _escape95Bis($match) {
429 return '\x'.((string)dechex(ord($match)));
433 private function _getJSFunction($aName) {
434 if (defined('self::JSFUNCTION'.$aName))
435 return constant('self::JSFUNCTION'.$aName);
440 // JavaScript Functions used.
441 // Note : In Dean's version, these functions are converted
442 // with 'String(aFunctionName);'.
443 // This internal conversion complete the original code, ex :
444 // 'while (aBool) anAction();' is converted to
445 // 'while (aBool) { anAction(); }'.
446 // The JavaScript functions below are corrected.
448 // unpacking function - this is the boot strap function
449 // data extracted from this packing routine is passed to
450 // this function when decoded in the target
451 // NOTE ! : without the ';' final.
452 const JSFUNCTION_unpack
=
454 'function($packed, $ascii, $count, $keywords, $encode, $decode) {
456 if ($keywords[$count]) {
457 $packed = $packed.replace(new RegExp(\'\\\\b\' + $encode($count) + \'\\\\b\', \'g\'), $keywords[$count]);
463 'function($packed, $ascii, $count, $keywords, $encode, $decode) {
465 if ($keywords[$count])
466 $packed = $packed.replace(new RegExp(\'\\\\b\' + $encode($count) + \'\\\\b\', \'g\'), $keywords[$count]);
471 // code-snippet inserted into the unpacker to speed up decoding
472 const JSFUNCTION_decodeBody
=
473 //_decode = function() {
474 // does the browser support String.replace where the
475 // replacement value is a function?
477 ' if (!\'\'.replace(/^/, String)) {
478 // decode all the values we need
480 $decode[$encode($count)] = $keywords[$count] || $encode($count);
482 // global replacement function
483 $keywords = [function ($encoded) {return $decode[$encoded]}];
485 $encode = function () {return \'\\\\w+\'};
486 // reset the loop counter - we are now doing a global replace
492 ' if (!\'\'.replace(/^/, String)) {
493 // decode all the values we need
494 while ($count--) $decode[$encode($count)] = $keywords[$count] || $encode($count);
495 // global replacement function
496 $keywords = [function ($encoded) {return $decode[$encoded]}];
498 $encode = function () {return\'\\\\w+\'};
499 // reset the loop counter - we are now doing a global replace
505 // characters: 0123456789
506 const JSFUNCTION_encode10
=
507 'function($charCode) {
511 // inherent base36 support
512 // characters: 0123456789abcdefghijklmnopqrstuvwxyz
513 const JSFUNCTION_encode36
=
514 'function($charCode) {
515 return $charCode.toString(36);
518 // hitch a ride on base36 and add the upper case alpha characters
519 // characters: 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ
520 const JSFUNCTION_encode62
=
521 'function($charCode) {
522 return ($charCode < _encoding ? \'\' : arguments.callee(parseInt($charCode / _encoding))) +
523 (($charCode = $charCode % _encoding) > 35 ? String.fromCharCode($charCode + 29) : $charCode.toString(36));
526 // use high-ascii values
527 // characters: ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþ
528 const JSFUNCTION_encode95
=
529 'function($charCode) {
530 return ($charCode < _encoding ? \'\' : arguments.callee($charCode / _encoding)) +
531 String.fromCharCode($charCode % _encoding + 161);
538 public $ignoreCase = false;
539 public $escapeChar = '';
542 const EXPRESSION
= 0;
543 const REPLACEMENT
= 1;
546 // used to determine nesting levels
547 private $GROUPS = '/\\(/';//g
548 private $SUB_REPLACE = '/\\$\\d/';
549 private $INDEXED = '/^\\$\\d+$/';
550 private $TRIM = '/([\'"])\\1\\.(.*)\\.\\1\\1$/';
551 private $ESCAPE = '/\\\./';//g
552 private $QUOTE = '/\'/';
553 private $DELETED = '/\\x01[^\\x01]*\\x01/';//g
555 public function add($expression, $replacement = '') {
556 // count the number of sub-expressions
557 // - add one because each pattern is itself a sub-expression
558 $length = 1 +
preg_match_all($this->GROUPS
, $this->_internalEscape((string)$expression), $out);
560 // treat only strings $replacement
561 if (is_string($replacement)) {
562 // does the pattern deal with sub-expressions?
563 if (preg_match($this->SUB_REPLACE
, $replacement)) {
564 // a simple lookup? (e.g. "$2")
565 if (preg_match($this->INDEXED
, $replacement)) {
566 // store the index (used for fast retrieval of matched strings)
567 $replacement = (int)(substr($replacement, 1)) - 1;
568 } else { // a complicated lookup (e.g. "Hello $2 $1")
569 // build a function to do the lookup
570 $quote = preg_match($this->QUOTE
, $this->_internalEscape($replacement))
572 $replacement = array(
573 'fn' => '_backReferences',
575 'replacement' => $replacement,
583 // pass the modified arguments
584 if (!empty($expression)) $this->_add($expression, $replacement, $length);
585 else $this->_add('/^$/', $replacement, $length);
588 public function exec($string) {
589 // execute the global replacement
590 $this->_escaped
= array();
592 // simulate the _patterns.toSTring of Dean
594 foreach ($this->_patterns
as $reg) {
595 $regexp .= '(' . substr($reg[self
::EXPRESSION
], 1, -1) . ')|';
597 $regexp = substr($regexp, 0, -1) . '/';
598 $regexp .= ($this->ignoreCase
) ?
'i' : '';
600 $string = $this->_escape($string, $this->escapeChar
);
601 $string = preg_replace_callback(
609 $string = $this->_unescape($string, $this->escapeChar
);
611 return preg_replace($this->DELETED
, '', $string);
614 public function reset() {
615 // clear the patterns collection so that this object may be re-used
616 $this->_patterns
= array();
620 private $_escaped = array(); // escaped characters
621 private $_patterns = array(); // patterns stored by index
623 // create and add a new pattern to the patterns collection
624 private function _add() {
625 $arguments = func_get_args();
626 $this->_patterns
[] = $arguments;
629 // this is the global replace function (it's quite complicated)
630 private function _replacement($arguments) {
631 if (empty($arguments)) return '';
634 // loop through the patterns
635 while (isset($this->_patterns
[$j])) {
636 $pattern = $this->_patterns
[$j++
];
637 // do we have a result?
638 if (isset($arguments[$i]) && ($arguments[$i] != '')) {
639 $replacement = $pattern[self
::REPLACEMENT
];
641 if (is_array($replacement) && isset($replacement['fn'])) {
643 if (isset($replacement['data'])) $this->buffer
= $replacement['data'];
644 return call_user_func(array(&$this, $replacement['fn']), $arguments, $i);
646 } elseif (is_int($replacement)) {
647 return $arguments[$replacement +
$i];
650 $delete = ($this->escapeChar
== '' ||
651 strpos($arguments[$i], $this->escapeChar
) === false)
652 ?
'' : "\x01" . $arguments[$i] . "\x01";
653 return $delete . $replacement;
655 // skip over references to sub-expressions
657 $i +
= $pattern[self
::LENGTH
];
662 private function _backReferences($match, $offset) {
663 $replacement = $this->buffer
['replacement'];
664 $quote = $this->buffer
['quote'];
665 $i = $this->buffer
['length'];
667 $replacement = str_replace('$'.$i--, $match[$offset +
$i], $replacement);
672 private function _replace_name($match, $offset){
673 $length = strlen($match[$offset +
2]);
674 $start = $length - max($length - strlen($match[$offset +
3]), 0);
675 return substr($match[$offset +
1], $start, $length) . $match[$offset +
4];
678 private function _replace_encoded($match, $offset) {
679 return $this->buffer
[$match[$offset]];
683 // php : we cannot pass additional data to preg_replace_callback,
684 // and we cannot use &$this in create_function, so let's go to lower level
687 // encode escaped characters
688 private function _escape($string, $escapeChar) {
690 $this->buffer
= $escapeChar;
691 return preg_replace_callback(
692 '/\\' . $escapeChar . '(.)' .'/',
693 array(&$this, '_escapeBis'),
701 private function _escapeBis($match) {
702 $this->_escaped
[] = $match[1];
703 return $this->buffer
;
706 // decode escaped characters
707 private function _unescape($string, $escapeChar) {
709 $regexp = '/'.'\\'.$escapeChar.'/';
710 $this->buffer
= array('escapeChar'=> $escapeChar, 'i' => 0);
711 return preg_replace_callback
714 array(&$this, '_unescapeBis'),
722 private function _unescapeBis() {
723 if (!empty($this->_escaped
[$this->buffer
['i']])) {
724 $temp = $this->_escaped
[$this->buffer
['i']];
728 $this->buffer
['i']++
;
729 return $this->buffer
['escapeChar'] . $temp;
732 private function _internalEscape($string) {
733 return preg_replace($this->ESCAPE
, '', $string);