fixed japanese Wikipedia interwiki link added graphics for other WPs
[dokuwiki.git] / inc / geshi.php
blobabe69a2bd92e28d2ba1475e502449e7ba7be314e
1 <?php
2 /**
3 * GeSHi - Generic Syntax Highlighter
5 * The GeSHi class for Generic Syntax Highlighting. Please refer to the
6 * documentation at http://qbnz.com/highlighter/documentation.php for more
7 * information about how to use this class.
9 * For changes, release notes, TODOs etc, see the relevant files in the docs/
10 * directory.
12 * This file is part of GeSHi.
14 * GeSHi is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
19 * GeSHi is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
24 * You should have received a copy of the GNU General Public License
25 * along with GeSHi; if not, write to the Free Software
26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28 * @package geshi
29 * @subpackage core
30 * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
31 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
32 * @license http://gnu.org/copyleft/gpl.html GNU GPL
37 // GeSHi Constants
38 // You should use these constant names in your programs instead of
39 // their values - you never know when a value may change in a future
40 // version
43 /** The version of this GeSHi file */
44 define('GESHI_VERSION', '1.0.8.4');
46 // Define the root directory for the GeSHi code tree
47 if (!defined('GESHI_ROOT')) {
48 /** The root directory for GeSHi */
49 define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
51 /** The language file directory for GeSHi
52 @access private */
53 define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
55 // Define if GeSHi should be paranoid about security
56 if (!defined('GESHI_SECURITY_PARANOID')) {
57 /** Tells GeSHi to be paranoid about security settings */
58 define('GESHI_SECURITY_PARANOID', false);
61 // Line numbers - use with enable_line_numbers()
62 /** Use no line numbers when building the result */
63 define('GESHI_NO_LINE_NUMBERS', 0);
64 /** Use normal line numbers when building the result */
65 define('GESHI_NORMAL_LINE_NUMBERS', 1);
66 /** Use fancy line numbers when building the result */
67 define('GESHI_FANCY_LINE_NUMBERS', 2);
69 // Container HTML type
70 /** Use nothing to surround the source */
71 define('GESHI_HEADER_NONE', 0);
72 /** Use a "div" to surround the source */
73 define('GESHI_HEADER_DIV', 1);
74 /** Use a "pre" to surround the source */
75 define('GESHI_HEADER_PRE', 2);
76 /** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */
77 define('GESHI_HEADER_PRE_VALID', 3);
78 /**
79 * Use a "table" to surround the source:
81 * <table>
82 * <thead><tr><td colspan="2">$header</td></tr></thead>
83 * <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody>
84 * <tfooter><tr><td colspan="2">$footer</td></tr></tfoot>
85 * </table>
87 * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
88 * https://bugzilla.mozilla.org/show_bug.cgi?id=365805
89 * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
91 define('GESHI_HEADER_PRE_TABLE', 4);
93 // Capatalisation constants
94 /** Lowercase keywords found */
95 define('GESHI_CAPS_NO_CHANGE', 0);
96 /** Uppercase keywords found */
97 define('GESHI_CAPS_UPPER', 1);
98 /** Leave keywords found as the case that they are */
99 define('GESHI_CAPS_LOWER', 2);
101 // Link style constants
102 /** Links in the source in the :link state */
103 define('GESHI_LINK', 0);
104 /** Links in the source in the :hover state */
105 define('GESHI_HOVER', 1);
106 /** Links in the source in the :active state */
107 define('GESHI_ACTIVE', 2);
108 /** Links in the source in the :visited state */
109 define('GESHI_VISITED', 3);
111 // Important string starter/finisher
112 // Note that if you change these, they should be as-is: i.e., don't
113 // write them as if they had been run through htmlentities()
114 /** The starter for important parts of the source */
115 define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
116 /** The ender for important parts of the source */
117 define('GESHI_END_IMPORTANT', '<END GeSHi>');
119 /**#@+
120 * @access private
122 // When strict mode applies for a language
123 /** Strict mode never applies (this is the most common) */
124 define('GESHI_NEVER', 0);
125 /** Strict mode *might* apply, and can be enabled or
126 disabled by {@link GeSHi->enable_strict_mode()} */
127 define('GESHI_MAYBE', 1);
128 /** Strict mode always applies */
129 define('GESHI_ALWAYS', 2);
131 // Advanced regexp handling constants, used in language files
132 /** The key of the regex array defining what to search for */
133 define('GESHI_SEARCH', 0);
134 /** The key of the regex array defining what bracket group in a
135 matched search to use as a replacement */
136 define('GESHI_REPLACE', 1);
137 /** The key of the regex array defining any modifiers to the regular expression */
138 define('GESHI_MODIFIERS', 2);
139 /** The key of the regex array defining what bracket group in a
140 matched search to put before the replacement */
141 define('GESHI_BEFORE', 3);
142 /** The key of the regex array defining what bracket group in a
143 matched search to put after the replacement */
144 define('GESHI_AFTER', 4);
145 /** The key of the regex array defining a custom keyword to use
146 for this regexp's html tag class */
147 define('GESHI_CLASS', 5);
149 /** Used in language files to mark comments */
150 define('GESHI_COMMENTS', 0);
152 /** Used to work around missing PHP features **/
153 define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1));
155 /** make sure we can call stripos **/
156 if (!function_exists('stripos')) {
157 // the offset param of preg_match is not supported below PHP 4.3.3
158 if (GESHI_PHP_PRE_433) {
160 * @ignore
162 function stripos($haystack, $needle, $offset = null) {
163 if (!is_null($offset)) {
164 $haystack = substr($haystack, $offset);
166 if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE)) {
167 return $match[0][1];
169 return false;
172 else {
174 * @ignore
176 function stripos($haystack, $needle, $offset = null) {
177 if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE, $offset)) {
178 return $match[0][1];
180 return false;
185 /** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
186 regular expressions. Set this to false if your PCRE lib is up to date
187 @see GeSHi->optimize_regexp_list()
189 define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
190 /** it's also important not to generate too long regular expressions
191 be generous here... but keep in mind, that when reaching this limit we
192 still have to close open patterns. 12k should do just fine on a 16k limit.
193 @see GeSHi->optimize_regexp_list()
195 define('GESHI_MAX_PCRE_LENGTH', 12288);
197 //Number format specification
198 /** Basic number format for integers */
199 define('GESHI_NUMBER_INT_BASIC', 1); //Default integers \d+
200 /** Enhanced number format for integers like seen in C */
201 define('GESHI_NUMBER_INT_CSTYLE', 2); //Default C-Style \d+[lL]?
202 /** Number format to highlight binary numbers with a suffix "b" */
203 define('GESHI_NUMBER_BIN_SUFFIX', 16); //[01]+[bB]
204 /** Number format to highlight binary numbers with a prefix % */
205 define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32); //%[01]+
206 /** Number format to highlight binary numbers with a prefix 0b (C) */
207 define('GESHI_NUMBER_BIN_PREFIX_0B', 64); //0b[01]+
208 /** Number format to highlight octal numbers with a leading zero */
209 define('GESHI_NUMBER_OCT_PREFIX', 256); //0[0-7]+
210 /** Number format to highlight octal numbers with a suffix of o */
211 define('GESHI_NUMBER_OCT_SUFFIX', 512); //[0-7]+[oO]
212 /** Number format to highlight hex numbers with a prefix 0x */
213 define('GESHI_NUMBER_HEX_PREFIX', 4096); //0x[0-9a-fA-F]+
214 /** Number format to highlight hex numbers with a suffix of h */
215 define('GESHI_NUMBER_HEX_SUFFIX', 8192); //[0-9][0-9a-fA-F]*h
216 /** Number format to highlight floating-point numbers without support for scientific notation */
217 define('GESHI_NUMBER_FLT_NONSCI', 65536); //\d+\.\d+
218 /** Number format to highlight floating-point numbers without support for scientific notation */
219 define('GESHI_NUMBER_FLT_NONSCI_F', 131072); //\d+(\.\d+)?f
220 /** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
221 define('GESHI_NUMBER_FLT_SCI_SHORT', 262144); //\.\d+e\d+
222 /** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
223 define('GESHI_NUMBER_FLT_SCI_ZERO', 524288); //\d+(\.\d+)?e\d+
224 //Custom formats are passed by RX array
226 // Error detection - use these to analyse faults
227 /** No sourcecode to highlight was specified
228 * @deprecated
230 define('GESHI_ERROR_NO_INPUT', 1);
231 /** The language specified does not exist */
232 define('GESHI_ERROR_NO_SUCH_LANG', 2);
233 /** GeSHi could not open a file for reading (generally a language file) */
234 define('GESHI_ERROR_FILE_NOT_READABLE', 3);
235 /** The header type passed to {@link GeSHi->set_header_type()} was invalid */
236 define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
237 /** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
238 define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
239 /**#@-*/
243 * The GeSHi Class.
245 * Please refer to the documentation for GeSHi 1.0.X that is available
246 * at http://qbnz.com/highlighter/documentation.php for more information
247 * about how to use this class.
249 * @package geshi
250 * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
251 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
253 class GeSHi {
254 /**#@+
255 * @access private
258 * The source code to highlight
259 * @var string
261 var $source = '';
264 * The language to use when highlighting
265 * @var string
267 var $language = '';
270 * The data for the language used
271 * @var array
273 var $language_data = array();
276 * The path to the language files
277 * @var string
279 var $language_path = GESHI_LANG_ROOT;
282 * The error message associated with an error
283 * @var string
284 * @todo check err reporting works
286 var $error = false;
289 * Possible error messages
290 * @var array
292 var $error_messages = array(
293 GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
294 GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
295 GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
296 GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
300 * Whether highlighting is strict or not
301 * @var boolean
303 var $strict_mode = false;
306 * Whether to use CSS classes in output
307 * @var boolean
309 var $use_classes = false;
312 * The type of header to use. Can be one of the following
313 * values:
315 * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element.
316 * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element.
317 * - GESHI_HEADER_NONE: No header is outputted.
319 * @var int
321 var $header_type = GESHI_HEADER_PRE;
324 * Array of permissions for which lexics should be highlighted
325 * @var array
327 var $lexic_permissions = array(
328 'KEYWORDS' => array(),
329 'COMMENTS' => array('MULTI' => true),
330 'REGEXPS' => array(),
331 'ESCAPE_CHAR' => true,
332 'BRACKETS' => true,
333 'SYMBOLS' => false,
334 'STRINGS' => true,
335 'NUMBERS' => true,
336 'METHODS' => true,
337 'SCRIPT' => true
341 * The time it took to parse the code
342 * @var double
344 var $time = 0;
347 * The content of the header block
348 * @var string
350 var $header_content = '';
353 * The content of the footer block
354 * @var string
356 var $footer_content = '';
359 * The style of the header block
360 * @var string
362 var $header_content_style = '';
365 * The style of the footer block
366 * @var string
368 var $footer_content_style = '';
371 * Tells if a block around the highlighted source should be forced
372 * if not using line numbering
373 * @var boolean
375 var $force_code_block = false;
378 * The styles for hyperlinks in the code
379 * @var array
381 var $link_styles = array();
384 * Whether important blocks should be recognised or not
385 * @var boolean
386 * @deprecated
387 * @todo REMOVE THIS FUNCTIONALITY!
389 var $enable_important_blocks = false;
392 * Styles for important parts of the code
393 * @var string
394 * @deprecated
395 * @todo As above - rethink the whole idea of important blocks as it is buggy and
396 * will be hard to implement in 1.2
398 var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
401 * Whether CSS IDs should be added to the code
402 * @var boolean
404 var $add_ids = false;
407 * Lines that should be highlighted extra
408 * @var array
410 var $highlight_extra_lines = array();
413 * Styles of lines that should be highlighted extra
414 * @var array
416 var $highlight_extra_lines_styles = array();
419 * Styles of extra-highlighted lines
420 * @var string
422 var $highlight_extra_lines_style = 'background-color: #ffc;';
425 * The line ending
426 * If null, nl2br() will be used on the result string.
427 * Otherwise, all instances of \n will be replaced with $line_ending
428 * @var string
430 var $line_ending = null;
433 * Number at which line numbers should start at
434 * @var int
436 var $line_numbers_start = 1;
439 * The overall style for this code block
440 * @var string
442 var $overall_style = 'font-family:monospace;';
445 * The style for the actual code
446 * @var string
448 var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
451 * The overall class for this code block
452 * @var string
454 var $overall_class = '';
457 * The overall ID for this code block
458 * @var string
460 var $overall_id = '';
463 * Line number styles
464 * @var string
466 var $line_style1 = 'font-weight: normal; vertical-align:top;';
469 * Line number styles for fancy lines
470 * @var string
472 var $line_style2 = 'font-weight: bold; vertical-align:top;';
475 * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
476 * @var string
478 var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
481 * Flag for how line numbers are displayed
482 * @var boolean
484 var $line_numbers = GESHI_NO_LINE_NUMBERS;
487 * Flag to decide if multi line spans are allowed. Set it to false to make sure
488 * each tag is closed before and reopened after each linefeed.
489 * @var boolean
491 var $allow_multiline_span = true;
494 * The "nth" value for fancy line highlighting
495 * @var int
497 var $line_nth_row = 0;
500 * The size of tab stops
501 * @var int
503 var $tab_width = 8;
506 * Should we use language-defined tab stop widths?
507 * @var int
509 var $use_language_tab_width = false;
512 * Default target for keyword links
513 * @var string
515 var $link_target = '';
518 * The encoding to use for entity encoding
519 * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
520 * @var string
522 var $encoding = 'utf-8';
525 * Should keywords be linked?
526 * @var boolean
528 var $keyword_links = true;
531 * Currently loaded language file
532 * @var string
533 * @since 1.0.7.22
535 var $loaded_language = '';
538 * Wether the caches needed for parsing are built or not
540 * @var bool
541 * @since 1.0.8
543 var $parse_cache_built = false;
546 * Work around for Suhosin Patch with disabled /e modifier
548 * Note from suhosins author in config file:
549 * <blockquote>
550 * The /e modifier inside <code>preg_replace()</code> allows code execution.
551 * Often it is the cause for remote code execution exploits. It is wise to
552 * deactivate this feature and test where in the application it is used.
553 * The developer using the /e modifier should be made aware that he should
554 * use <code>preg_replace_callback()</code> instead
555 * </blockquote>
557 * @var array
558 * @since 1.0.8
560 var $_kw_replace_group = 0;
561 var $_rx_key = 0;
564 * some "callback parameters" for handle_multiline_regexps
566 * @since 1.0.8
567 * @access private
568 * @var string
570 var $_hmr_before = '';
571 var $_hmr_replace = '';
572 var $_hmr_after = '';
573 var $_hmr_key = 0;
575 /**#@-*/
578 * Creates a new GeSHi object, with source and language
580 * @param string The source code to highlight
581 * @param string The language to highlight the source with
582 * @param string The path to the language file directory. <b>This
583 * is deprecated!</b> I've backported the auto path
584 * detection from the 1.1.X dev branch, so now it
585 * should be automatically set correctly. If you have
586 * renamed the language directory however, you will
587 * still need to set the path using this parameter or
588 * {@link GeSHi->set_language_path()}
589 * @since 1.0.0
591 function GeSHi($source = '', $language = '', $path = '') {
592 if (!empty($source)) {
593 $this->set_source($source);
595 if (!empty($language)) {
596 $this->set_language($language);
598 $this->set_language_path($path);
602 * Returns an error message associated with the last GeSHi operation,
603 * or false if no error has occured
605 * @return string|false An error message if there has been an error, else false
606 * @since 1.0.0
608 function error() {
609 if ($this->error) {
610 //Put some template variables for debugging here ...
611 $debug_tpl_vars = array(
612 '{LANGUAGE}' => $this->language,
613 '{PATH}' => $this->language_path
615 $msg = str_replace(
616 array_keys($debug_tpl_vars),
617 array_values($debug_tpl_vars),
618 $this->error_messages[$this->error]);
620 return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
622 return false;
626 * Gets a human-readable language name (thanks to Simon Patterson
627 * for the idea :))
629 * @return string The name for the current language
630 * @since 1.0.2
632 function get_language_name() {
633 if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
634 return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
636 return $this->language_data['LANG_NAME'];
640 * Sets the source code for this object
642 * @param string The source code to highlight
643 * @since 1.0.0
645 function set_source($source) {
646 $this->source = $source;
647 $this->highlight_extra_lines = array();
651 * Sets the language for this object
653 * @note since 1.0.8 this function won't reset language-settings by default anymore!
654 * if you need this set $force_reset = true
656 * @param string The name of the language to use
657 * @since 1.0.0
659 function set_language($language, $force_reset = false) {
660 if ($force_reset) {
661 $this->loaded_language = false;
664 //Clean up the language name to prevent malicious code injection
665 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
667 $language = strtolower($language);
669 //Retreive the full filename
670 $file_name = $this->language_path . $language . '.php';
671 if ($file_name == $this->loaded_language) {
672 // this language is already loaded!
673 return;
676 $this->language = $language;
678 $this->error = false;
679 $this->strict_mode = GESHI_NEVER;
681 //Check if we can read the desired file
682 if (!is_readable($file_name)) {
683 $this->error = GESHI_ERROR_NO_SUCH_LANG;
684 return;
687 // Load the language for parsing
688 $this->load_language($file_name);
692 * Sets the path to the directory containing the language files. Note
693 * that this path is relative to the directory of the script that included
694 * geshi.php, NOT geshi.php itself.
696 * @param string The path to the language directory
697 * @since 1.0.0
698 * @deprecated The path to the language files should now be automatically
699 * detected, so this method should no longer be needed. The
700 * 1.1.X branch handles manual setting of the path differently
701 * so this method will disappear in 1.2.0.
703 function set_language_path($path) {
704 if(strpos($path,':')) {
705 //Security Fix to prevent external directories using fopen wrappers.
706 if(DIRECTORY_SEPARATOR == "\\") {
707 if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
708 return;
710 } else {
711 return;
714 if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
715 //Security Fix to prevent external directories using fopen wrappers.
716 return;
718 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
719 //Security Fix to prevent external directories using fopen wrappers.
720 return;
722 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
723 //Security Fix to prevent external directories using fopen wrappers.
724 return;
726 if ($path) {
727 $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
728 $this->set_language($this->language); // otherwise set_language_path has no effect
733 * Sets the type of header to be used.
735 * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
736 * means more source code but more control over tab width and line-wrapping.
737 * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
738 * control. Default is GESHI_HEADER_PRE.
740 * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
741 * should be outputted.
743 * @param int The type of header to be used
744 * @since 1.0.0
746 function set_header_type($type) {
747 //Check if we got a valid header type
748 if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
749 GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) {
750 $this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
751 return;
754 //Set that new header type
755 $this->header_type = $type;
759 * Sets the styles for the code that will be outputted
760 * when this object is parsed. The style should be a
761 * string of valid stylesheet declarations
763 * @param string The overall style for the outputted code block
764 * @param boolean Whether to merge the styles with the current styles or not
765 * @since 1.0.0
767 function set_overall_style($style, $preserve_defaults = false) {
768 if (!$preserve_defaults) {
769 $this->overall_style = $style;
770 } else {
771 $this->overall_style .= $style;
776 * Sets the overall classname for this block of code. This
777 * class can then be used in a stylesheet to style this object's
778 * output
780 * @param string The class name to use for this block of code
781 * @since 1.0.0
783 function set_overall_class($class) {
784 $this->overall_class = $class;
788 * Sets the overall id for this block of code. This id can then
789 * be used in a stylesheet to style this object's output
791 * @param string The ID to use for this block of code
792 * @since 1.0.0
794 function set_overall_id($id) {
795 $this->overall_id = $id;
799 * Sets whether CSS classes should be used to highlight the source. Default
800 * is off, calling this method with no arguments will turn it on
802 * @param boolean Whether to turn classes on or not
803 * @since 1.0.0
805 function enable_classes($flag = true) {
806 $this->use_classes = ($flag) ? true : false;
810 * Sets the style for the actual code. This should be a string
811 * containing valid stylesheet declarations. If $preserve_defaults is
812 * true, then styles are merged with the default styles, with the
813 * user defined styles having priority
815 * Note: Use this method to override any style changes you made to
816 * the line numbers if you are using line numbers, else the line of
817 * code will have the same style as the line number! Consult the
818 * GeSHi documentation for more information about this.
820 * @param string The style to use for actual code
821 * @param boolean Whether to merge the current styles with the new styles
822 * @since 1.0.2
824 function set_code_style($style, $preserve_defaults = false) {
825 if (!$preserve_defaults) {
826 $this->code_style = $style;
827 } else {
828 $this->code_style .= $style;
833 * Sets the styles for the line numbers.
835 * @param string The style for the line numbers that are "normal"
836 * @param string|boolean If a string, this is the style of the line
837 * numbers that are "fancy", otherwise if boolean then this
838 * defines whether the normal styles should be merged with the
839 * new normal styles or not
840 * @param boolean If set, is the flag for whether to merge the "fancy"
841 * styles with the current styles or not
842 * @since 1.0.2
844 function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
845 //Check if we got 2 or three parameters
846 if (is_bool($style2)) {
847 $preserve_defaults = $style2;
848 $style2 = '';
851 //Actually set the new styles
852 if (!$preserve_defaults) {
853 $this->line_style1 = $style1;
854 $this->line_style2 = $style2;
855 } else {
856 $this->line_style1 .= $style1;
857 $this->line_style2 .= $style2;
862 * Sets whether line numbers should be displayed.
864 * Valid values for the first parameter are:
866 * - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed
867 * - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed
868 * - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed
870 * For fancy line numbers, the second parameter is used to signal which lines
871 * are to be fancy. For example, if the value of this parameter is 5 then every
872 * 5th line will be fancy.
874 * @param int How line numbers should be displayed
875 * @param int Defines which lines are fancy
876 * @since 1.0.0
878 function enable_line_numbers($flag, $nth_row = 5) {
879 if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
880 && GESHI_FANCY_LINE_NUMBERS != $flag) {
881 $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
883 $this->line_numbers = $flag;
884 $this->line_nth_row = $nth_row;
888 * Sets wether spans and other HTML markup generated by GeSHi can
889 * span over multiple lines or not. Defaults to true to reduce overhead.
890 * Set it to false if you want to manipulate the output or manually display
891 * the code in an ordered list.
893 * @param boolean Wether multiline spans are allowed or not
894 * @since 1.0.7.22
896 function enable_multiline_span($flag) {
897 $this->allow_multiline_span = (bool) $flag;
901 * Get current setting for multiline spans, see GeSHi->enable_multiline_span().
903 * @see enable_multiline_span
904 * @return bool
906 function get_multiline_span() {
907 return $this->allow_multiline_span;
911 * Sets the style for a keyword group. If $preserve_defaults is
912 * true, then styles are merged with the default styles, with the
913 * user defined styles having priority
915 * @param int The key of the keyword group to change the styles of
916 * @param string The style to make the keywords
917 * @param boolean Whether to merge the new styles with the old or just
918 * to overwrite them
919 * @since 1.0.0
921 function set_keyword_group_style($key, $style, $preserve_defaults = false) {
922 //Set the style for this keyword group
923 if (!$preserve_defaults) {
924 $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
925 } else {
926 $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
929 //Update the lexic permissions
930 if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
931 $this->lexic_permissions['KEYWORDS'][$key] = true;
936 * Turns highlighting on/off for a keyword group
938 * @param int The key of the keyword group to turn on or off
939 * @param boolean Whether to turn highlighting for that group on or off
940 * @since 1.0.0
942 function set_keyword_group_highlighting($key, $flag = true) {
943 $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
947 * Sets the styles for comment groups. If $preserve_defaults is
948 * true, then styles are merged with the default styles, with the
949 * user defined styles having priority
951 * @param int The key of the comment group to change the styles of
952 * @param string The style to make the comments
953 * @param boolean Whether to merge the new styles with the old or just
954 * to overwrite them
955 * @since 1.0.0
957 function set_comments_style($key, $style, $preserve_defaults = false) {
958 if (!$preserve_defaults) {
959 $this->language_data['STYLES']['COMMENTS'][$key] = $style;
960 } else {
961 $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
966 * Turns highlighting on/off for comment groups
968 * @param int The key of the comment group to turn on or off
969 * @param boolean Whether to turn highlighting for that group on or off
970 * @since 1.0.0
972 function set_comments_highlighting($key, $flag = true) {
973 $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
977 * Sets the styles for escaped characters. If $preserve_defaults is
978 * true, then styles are merged with the default styles, with the
979 * user defined styles having priority
981 * @param string The style to make the escape characters
982 * @param boolean Whether to merge the new styles with the old or just
983 * to overwrite them
984 * @since 1.0.0
986 function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
987 if (!$preserve_defaults) {
988 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
989 } else {
990 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
995 * Turns highlighting on/off for escaped characters
997 * @param boolean Whether to turn highlighting for escape characters on or off
998 * @since 1.0.0
1000 function set_escape_characters_highlighting($flag = true) {
1001 $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
1005 * Sets the styles for brackets. If $preserve_defaults is
1006 * true, then styles are merged with the default styles, with the
1007 * user defined styles having priority
1009 * This method is DEPRECATED: use set_symbols_style instead.
1010 * This method will be removed in 1.2.X
1012 * @param string The style to make the brackets
1013 * @param boolean Whether to merge the new styles with the old or just
1014 * to overwrite them
1015 * @since 1.0.0
1016 * @deprecated In favour of set_symbols_style
1018 function set_brackets_style($style, $preserve_defaults = false) {
1019 if (!$preserve_defaults) {
1020 $this->language_data['STYLES']['BRACKETS'][0] = $style;
1021 } else {
1022 $this->language_data['STYLES']['BRACKETS'][0] .= $style;
1027 * Turns highlighting on/off for brackets
1029 * This method is DEPRECATED: use set_symbols_highlighting instead.
1030 * This method will be remove in 1.2.X
1032 * @param boolean Whether to turn highlighting for brackets on or off
1033 * @since 1.0.0
1034 * @deprecated In favour of set_symbols_highlighting
1036 function set_brackets_highlighting($flag) {
1037 $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
1041 * Sets the styles for symbols. If $preserve_defaults is
1042 * true, then styles are merged with the default styles, with the
1043 * user defined styles having priority
1045 * @param string The style to make the symbols
1046 * @param boolean Whether to merge the new styles with the old or just
1047 * to overwrite them
1048 * @param int Tells the group of symbols for which style should be set.
1049 * @since 1.0.1
1051 function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
1052 // Update the style of symbols
1053 if (!$preserve_defaults) {
1054 $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
1055 } else {
1056 $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
1059 // For backward compatibility
1060 if (0 == $group) {
1061 $this->set_brackets_style ($style, $preserve_defaults);
1066 * Turns highlighting on/off for symbols
1068 * @param boolean Whether to turn highlighting for symbols on or off
1069 * @since 1.0.0
1071 function set_symbols_highlighting($flag) {
1072 // Update lexic permissions for this symbol group
1073 $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
1075 // For backward compatibility
1076 $this->set_brackets_highlighting ($flag);
1080 * Sets the styles for strings. If $preserve_defaults is
1081 * true, then styles are merged with the default styles, with the
1082 * user defined styles having priority
1084 * @param string The style to make the escape characters
1085 * @param boolean Whether to merge the new styles with the old or just
1086 * to overwrite them
1087 * @since 1.0.0
1089 function set_strings_style($style, $preserve_defaults = false) {
1090 if (!$preserve_defaults) {
1091 $this->language_data['STYLES']['STRINGS'][0] = $style;
1092 } else {
1093 $this->language_data['STYLES']['STRINGS'][0] .= $style;
1098 * Turns highlighting on/off for strings
1100 * @param boolean Whether to turn highlighting for strings on or off
1101 * @since 1.0.0
1103 function set_strings_highlighting($flag) {
1104 $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
1108 * Sets the styles for strict code blocks. If $preserve_defaults is
1109 * true, then styles are merged with the default styles, with the
1110 * user defined styles having priority
1112 * @param string The style to make the script blocks
1113 * @param boolean Whether to merge the new styles with the old or just
1114 * to overwrite them
1115 * @param int Tells the group of script blocks for which style should be set.
1116 * @since 1.0.8.4
1118 function set_script_style($style, $preserve_defaults = false, $group = 0) {
1119 // Update the style of symbols
1120 if (!$preserve_defaults) {
1121 $this->language_data['STYLES']['SCRIPT'][$group] = $style;
1122 } else {
1123 $this->language_data['STYLES']['SCRIPT'][$group] .= $style;
1128 * Sets the styles for numbers. If $preserve_defaults is
1129 * true, then styles are merged with the default styles, with the
1130 * user defined styles having priority
1132 * @param string The style to make the numbers
1133 * @param boolean Whether to merge the new styles with the old or just
1134 * to overwrite them
1135 * @since 1.0.0
1137 function set_numbers_style($style, $preserve_defaults = false) {
1138 if (!$preserve_defaults) {
1139 $this->language_data['STYLES']['NUMBERS'][0] = $style;
1140 } else {
1141 $this->language_data['STYLES']['NUMBERS'][0] .= $style;
1146 * Turns highlighting on/off for numbers
1148 * @param boolean Whether to turn highlighting for numbers on or off
1149 * @since 1.0.0
1151 function set_numbers_highlighting($flag) {
1152 $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
1156 * Sets the styles for methods. $key is a number that references the
1157 * appropriate "object splitter" - see the language file for the language
1158 * you are highlighting to get this number. If $preserve_defaults is
1159 * true, then styles are merged with the default styles, with the
1160 * user defined styles having priority
1162 * @param int The key of the object splitter to change the styles of
1163 * @param string The style to make the methods
1164 * @param boolean Whether to merge the new styles with the old or just
1165 * to overwrite them
1166 * @since 1.0.0
1168 function set_methods_style($key, $style, $preserve_defaults = false) {
1169 if (!$preserve_defaults) {
1170 $this->language_data['STYLES']['METHODS'][$key] = $style;
1171 } else {
1172 $this->language_data['STYLES']['METHODS'][$key] .= $style;
1177 * Turns highlighting on/off for methods
1179 * @param boolean Whether to turn highlighting for methods on or off
1180 * @since 1.0.0
1182 function set_methods_highlighting($flag) {
1183 $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
1187 * Sets the styles for regexps. If $preserve_defaults is
1188 * true, then styles are merged with the default styles, with the
1189 * user defined styles having priority
1191 * @param string The style to make the regular expression matches
1192 * @param boolean Whether to merge the new styles with the old or just
1193 * to overwrite them
1194 * @since 1.0.0
1196 function set_regexps_style($key, $style, $preserve_defaults = false) {
1197 if (!$preserve_defaults) {
1198 $this->language_data['STYLES']['REGEXPS'][$key] = $style;
1199 } else {
1200 $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
1205 * Turns highlighting on/off for regexps
1207 * @param int The key of the regular expression group to turn on or off
1208 * @param boolean Whether to turn highlighting for the regular expression group on or off
1209 * @since 1.0.0
1211 function set_regexps_highlighting($key, $flag) {
1212 $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
1216 * Sets whether a set of keywords are checked for in a case sensitive manner
1218 * @param int The key of the keyword group to change the case sensitivity of
1219 * @param boolean Whether to check in a case sensitive manner or not
1220 * @since 1.0.0
1222 function set_case_sensitivity($key, $case) {
1223 $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
1227 * Sets the case that keywords should use when found. Use the constants:
1229 * - GESHI_CAPS_NO_CHANGE: leave keywords as-is
1230 * - GESHI_CAPS_UPPER: convert all keywords to uppercase where found
1231 * - GESHI_CAPS_LOWER: convert all keywords to lowercase where found
1233 * @param int A constant specifying what to do with matched keywords
1234 * @since 1.0.1
1236 function set_case_keywords($case) {
1237 if (in_array($case, array(
1238 GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) {
1239 $this->language_data['CASE_KEYWORDS'] = $case;
1244 * Sets how many spaces a tab is substituted for
1246 * Widths below zero are ignored
1248 * @param int The tab width
1249 * @since 1.0.0
1251 function set_tab_width($width) {
1252 $this->tab_width = intval($width);
1254 //Check if it fit's the constraints:
1255 if ($this->tab_width < 1) {
1256 //Return it to the default
1257 $this->tab_width = 8;
1262 * Sets whether or not to use tab-stop width specifed by language
1264 * @param boolean Whether to use language-specific tab-stop widths
1265 * @since 1.0.7.20
1267 function set_use_language_tab_width($use) {
1268 $this->use_language_tab_width = (bool) $use;
1272 * Returns the tab width to use, based on the current language and user
1273 * preference
1275 * @return int Tab width
1276 * @since 1.0.7.20
1278 function get_real_tab_width() {
1279 if (!$this->use_language_tab_width ||
1280 !isset($this->language_data['TAB_WIDTH'])) {
1281 return $this->tab_width;
1282 } else {
1283 return $this->language_data['TAB_WIDTH'];
1288 * Enables/disables strict highlighting. Default is off, calling this
1289 * method without parameters will turn it on. See documentation
1290 * for more details on strict mode and where to use it.
1292 * @param boolean Whether to enable strict mode or not
1293 * @since 1.0.0
1295 function enable_strict_mode($mode = true) {
1296 if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1297 $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
1302 * Disables all highlighting
1304 * @since 1.0.0
1305 * @todo Rewrite with array traversal
1306 * @deprecated In favour of enable_highlighting
1308 function disable_highlighting() {
1309 $this->enable_highlighting(false);
1313 * Enables all highlighting
1315 * The optional flag parameter was added in version 1.0.7.21 and can be used
1316 * to enable (true) or disable (false) all highlighting.
1318 * @since 1.0.0
1319 * @param boolean A flag specifying whether to enable or disable all highlighting
1320 * @todo Rewrite with array traversal
1322 function enable_highlighting($flag = true) {
1323 $flag = $flag ? true : false;
1324 foreach ($this->lexic_permissions as $key => $value) {
1325 if (is_array($value)) {
1326 foreach ($value as $k => $v) {
1327 $this->lexic_permissions[$key][$k] = $flag;
1329 } else {
1330 $this->lexic_permissions[$key] = $flag;
1334 // Context blocks
1335 $this->enable_important_blocks = $flag;
1339 * Given a file extension, this method returns either a valid geshi language
1340 * name, or the empty string if it couldn't be found
1342 * @param string The extension to get a language name for
1343 * @param array A lookup array to use instead of the default one
1344 * @since 1.0.5
1345 * @todo Re-think about how this method works (maybe make it private and/or make it
1346 * a extension->lang lookup?)
1347 * @todo static?
1349 function get_language_name_from_extension( $extension, $lookup = array() ) {
1350 if ( !is_array($lookup) || empty($lookup)) {
1351 $lookup = array(
1352 'abap' => array('abap'),
1353 'actionscript' => array('as'),
1354 'ada' => array('a', 'ada', 'adb', 'ads'),
1355 'apache' => array('conf'),
1356 'asm' => array('ash', 'asm', 'inc'),
1357 'asp' => array('asp'),
1358 'bash' => array('sh'),
1359 'bf' => array('bf'),
1360 'c' => array('c', 'h'),
1361 'c_mac' => array('c', 'h'),
1362 'caddcl' => array(),
1363 'cadlisp' => array(),
1364 'cdfg' => array('cdfg'),
1365 'cobol' => array('cbl'),
1366 'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
1367 'csharp' => array('cs'),
1368 'css' => array('css'),
1369 'd' => array('d'),
1370 'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
1371 'diff' => array('diff', 'patch'),
1372 'dos' => array('bat', 'cmd'),
1373 'gettext' => array('po', 'pot'),
1374 'gml' => array('gml'),
1375 'gnuplot' => array('plt'),
1376 'groovy' => array('groovy'),
1377 'haskell' => array('hs'),
1378 'html4strict' => array('html', 'htm'),
1379 'ini' => array('ini', 'desktop'),
1380 'java' => array('java'),
1381 'javascript' => array('js'),
1382 'klonec' => array('kl1'),
1383 'klonecpp' => array('klx'),
1384 'latex' => array('tex'),
1385 'lisp' => array('lisp'),
1386 'lua' => array('lua'),
1387 'matlab' => array('m'),
1388 'mpasm' => array(),
1389 'mysql' => array('sql'),
1390 'nsis' => array(),
1391 'objc' => array(),
1392 'oobas' => array(),
1393 'oracle8' => array(),
1394 'oracle10' => array(),
1395 'pascal' => array('pas'),
1396 'perl' => array('pl', 'pm'),
1397 'php' => array('php', 'php5', 'phtml', 'phps'),
1398 'povray' => array('pov'),
1399 'providex' => array('pvc', 'pvx'),
1400 'prolog' => array('pl'),
1401 'python' => array('py'),
1402 'qbasic' => array('bi'),
1403 'reg' => array('reg'),
1404 'ruby' => array('rb'),
1405 'sas' => array('sas'),
1406 'scala' => array('scala'),
1407 'scheme' => array('scm'),
1408 'scilab' => array('sci'),
1409 'smalltalk' => array('st'),
1410 'smarty' => array(),
1411 'tcl' => array('tcl'),
1412 'vb' => array('bas'),
1413 'vbnet' => array(),
1414 'visualfoxpro' => array(),
1415 'whitespace' => array('ws'),
1416 'xml' => array('xml', 'svg', 'xrc'),
1417 'z80' => array('z80', 'asm', 'inc')
1421 foreach ($lookup as $lang => $extensions) {
1422 if (in_array($extension, $extensions)) {
1423 return $lang;
1426 return '';
1430 * Given a file name, this method loads its contents in, and attempts
1431 * to set the language automatically. An optional lookup table can be
1432 * passed for looking up the language name. If not specified a default
1433 * table is used
1435 * The language table is in the form
1436 * <pre>array(
1437 * 'lang_name' => array('extension', 'extension', ...),
1438 * 'lang_name' ...
1439 * );</pre>
1441 * @param string The filename to load the source from
1442 * @param array A lookup array to use instead of the default one
1443 * @todo Complete rethink of this and above method
1444 * @since 1.0.5
1446 function load_from_file($file_name, $lookup = array()) {
1447 if (is_readable($file_name)) {
1448 $this->set_source(file_get_contents($file_name));
1449 $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1450 } else {
1451 $this->error = GESHI_ERROR_FILE_NOT_READABLE;
1456 * Adds a keyword to a keyword group for highlighting
1458 * @param int The key of the keyword group to add the keyword to
1459 * @param string The word to add to the keyword group
1460 * @since 1.0.0
1462 function add_keyword($key, $word) {
1463 if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
1464 $this->language_data['KEYWORDS'][$key][] = $word;
1466 //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
1467 if ($this->parse_cache_built) {
1468 $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
1469 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
1475 * Removes a keyword from a keyword group
1477 * @param int The key of the keyword group to remove the keyword from
1478 * @param string The word to remove from the keyword group
1479 * @param bool Wether to automatically recompile the optimized regexp list or not.
1480 * Note: if you set this to false and @see GeSHi->parse_code() was already called once,
1481 * for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
1482 * or the removed keyword will stay in cache and still be highlighted! On the other hand
1483 * it might be too expensive to recompile the regexp list for every removal if you want to
1484 * remove a lot of keywords.
1485 * @since 1.0.0
1487 function remove_keyword($key, $word, $recompile = true) {
1488 $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
1489 if ($key_to_remove !== false) {
1490 unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
1492 //NEW in 1.0.8, optionally recompile keyword group
1493 if ($recompile && $this->parse_cache_built) {
1494 $this->optimize_keyword_group($key);
1500 * Creates a new keyword group
1502 * @param int The key of the keyword group to create
1503 * @param string The styles for the keyword group
1504 * @param boolean Whether the keyword group is case sensitive ornot
1505 * @param array The words to use for the keyword group
1506 * @since 1.0.0
1508 function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
1509 $words = (array) $words;
1510 if (empty($words)) {
1511 // empty word lists mess up highlighting
1512 return false;
1515 //Add the new keyword group internally
1516 $this->language_data['KEYWORDS'][$key] = $words;
1517 $this->lexic_permissions['KEYWORDS'][$key] = true;
1518 $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1519 $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1521 //NEW in 1.0.8, cache keyword regexp
1522 if ($this->parse_cache_built) {
1523 $this->optimize_keyword_group($key);
1528 * Removes a keyword group
1530 * @param int The key of the keyword group to remove
1531 * @since 1.0.0
1533 function remove_keyword_group ($key) {
1534 //Remove the keyword group internally
1535 unset($this->language_data['KEYWORDS'][$key]);
1536 unset($this->lexic_permissions['KEYWORDS'][$key]);
1537 unset($this->language_data['CASE_SENSITIVE'][$key]);
1538 unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1540 //NEW in 1.0.8
1541 unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
1545 * compile optimized regexp list for keyword group
1547 * @param int The key of the keyword group to compile & optimize
1548 * @since 1.0.8
1550 function optimize_keyword_group($key) {
1551 $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
1552 $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
1553 $space_as_whitespace = false;
1554 if(isset($this->language_data['PARSER_CONTROL'])) {
1555 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
1556 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
1557 $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
1559 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1560 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1561 $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
1566 if($space_as_whitespace) {
1567 foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
1568 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
1569 str_replace(" ", "\\s+", $rxv);
1575 * Sets the content of the header block
1577 * @param string The content of the header block
1578 * @since 1.0.2
1580 function set_header_content($content) {
1581 $this->header_content = $content;
1585 * Sets the content of the footer block
1587 * @param string The content of the footer block
1588 * @since 1.0.2
1590 function set_footer_content($content) {
1591 $this->footer_content = $content;
1595 * Sets the style for the header content
1597 * @param string The style for the header content
1598 * @since 1.0.2
1600 function set_header_content_style($style) {
1601 $this->header_content_style = $style;
1605 * Sets the style for the footer content
1607 * @param string The style for the footer content
1608 * @since 1.0.2
1610 function set_footer_content_style($style) {
1611 $this->footer_content_style = $style;
1615 * Sets whether to force a surrounding block around
1616 * the highlighted code or not
1618 * @param boolean Tells whether to enable or disable this feature
1619 * @since 1.0.7.20
1621 function enable_inner_code_block($flag) {
1622 $this->force_code_block = (bool)$flag;
1626 * Sets the base URL to be used for keywords
1628 * @param int The key of the keyword group to set the URL for
1629 * @param string The URL to set for the group. If {FNAME} is in
1630 * the url somewhere, it is replaced by the keyword
1631 * that the URL is being made for
1632 * @since 1.0.2
1634 function set_url_for_keyword_group($group, $url) {
1635 $this->language_data['URLS'][$group] = $url;
1639 * Sets styles for links in code
1641 * @param int A constant that specifies what state the style is being
1642 * set for - e.g. :hover or :visited
1643 * @param string The styles to use for that state
1644 * @since 1.0.2
1646 function set_link_styles($type, $styles) {
1647 $this->link_styles[$type] = $styles;
1651 * Sets the target for links in code
1653 * @param string The target for links in the code, e.g. _blank
1654 * @since 1.0.3
1656 function set_link_target($target) {
1657 if (!$target) {
1658 $this->link_target = '';
1659 } else {
1660 $this->link_target = ' target="' . $target . '"';
1665 * Sets styles for important parts of the code
1667 * @param string The styles to use on important parts of the code
1668 * @since 1.0.2
1670 function set_important_styles($styles) {
1671 $this->important_styles = $styles;
1675 * Sets whether context-important blocks are highlighted
1677 * @param boolean Tells whether to enable or disable highlighting of important blocks
1678 * @todo REMOVE THIS SHIZ FROM GESHI!
1679 * @deprecated
1680 * @since 1.0.2
1682 function enable_important_blocks($flag) {
1683 $this->enable_important_blocks = ( $flag ) ? true : false;
1687 * Whether CSS IDs should be added to each line
1689 * @param boolean If true, IDs will be added to each line.
1690 * @since 1.0.2
1692 function enable_ids($flag = true) {
1693 $this->add_ids = ($flag) ? true : false;
1697 * Specifies which lines to highlight extra
1699 * The extra style parameter was added in 1.0.7.21.
1701 * @param mixed An array of line numbers to highlight, or just a line
1702 * number on its own.
1703 * @param string A string specifying the style to use for this line.
1704 * If null is specified, the default style is used.
1705 * If false is specified, the line will be removed from
1706 * special highlighting
1707 * @since 1.0.2
1708 * @todo Some data replication here that could be cut down on
1710 function highlight_lines_extra($lines, $style = null) {
1711 if (is_array($lines)) {
1712 //Split up the job using single lines at a time
1713 foreach ($lines as $line) {
1714 $this->highlight_lines_extra($line, $style);
1716 } else {
1717 //Mark the line as being highlighted specially
1718 $lines = intval($lines);
1719 $this->highlight_extra_lines[$lines] = $lines;
1721 //Decide on which style to use
1722 if ($style === null) { //Check if we should use default style
1723 unset($this->highlight_extra_lines_styles[$lines]);
1724 } else if ($style === false) { //Check if to remove this line
1725 unset($this->highlight_extra_lines[$lines]);
1726 unset($this->highlight_extra_lines_styles[$lines]);
1727 } else {
1728 $this->highlight_extra_lines_styles[$lines] = $style;
1734 * Sets the style for extra-highlighted lines
1736 * @param string The style for extra-highlighted lines
1737 * @since 1.0.2
1739 function set_highlight_lines_extra_style($styles) {
1740 $this->highlight_extra_lines_style = $styles;
1744 * Sets the line-ending
1746 * @param string The new line-ending
1747 * @since 1.0.2
1749 function set_line_ending($line_ending) {
1750 $this->line_ending = (string)$line_ending;
1754 * Sets what number line numbers should start at. Should
1755 * be a positive integer, and will be converted to one.
1757 * <b>Warning:</b> Using this method will add the "start"
1758 * attribute to the &lt;ol&gt; that is used for line numbering.
1759 * This is <b>not</b> valid XHTML strict, so if that's what you
1760 * care about then don't use this method. Firefox is getting
1761 * support for the CSS method of doing this in 1.1 and Opera
1762 * has support for the CSS method, but (of course) IE doesn't
1763 * so it's not worth doing it the CSS way yet.
1765 * @param int The number to start line numbers at
1766 * @since 1.0.2
1768 function start_line_numbers_at($number) {
1769 $this->line_numbers_start = abs(intval($number));
1773 * Sets the encoding used for htmlspecialchars(), for international
1774 * support.
1776 * NOTE: This is not needed for now because htmlspecialchars() is not
1777 * being used (it has a security hole in PHP4 that has not been patched).
1778 * Maybe in a future version it may make a return for speed reasons, but
1779 * I doubt it.
1781 * @param string The encoding to use for the source
1782 * @since 1.0.3
1784 function set_encoding($encoding) {
1785 if ($encoding) {
1786 $this->encoding = strtolower($encoding);
1791 * Turns linking of keywords on or off.
1793 * @param boolean If true, links will be added to keywords
1794 * @since 1.0.2
1796 function enable_keyword_links($enable = true) {
1797 $this->keyword_links = (bool) $enable;
1801 * Setup caches needed for styling. This is automatically called in
1802 * parse_code() and get_stylesheet() when appropriate. This function helps
1803 * stylesheet generators as they rely on some style information being
1804 * preprocessed
1806 * @since 1.0.8
1807 * @access private
1809 function build_style_cache() {
1810 //Build the style cache needed to highlight numbers appropriate
1811 if($this->lexic_permissions['NUMBERS']) {
1812 //First check what way highlighting information for numbers are given
1813 if(!isset($this->language_data['NUMBERS'])) {
1814 $this->language_data['NUMBERS'] = 0;
1817 if(is_array($this->language_data['NUMBERS'])) {
1818 $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
1819 } else {
1820 $this->language_data['NUMBERS_CACHE'] = array();
1821 if(!$this->language_data['NUMBERS']) {
1822 $this->language_data['NUMBERS'] =
1823 GESHI_NUMBER_INT_BASIC |
1824 GESHI_NUMBER_FLT_NONSCI;
1827 for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
1828 //Rearrange style indices if required ...
1829 if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
1830 $this->language_data['STYLES']['NUMBERS'][$i] =
1831 $this->language_data['STYLES']['NUMBERS'][1<<$i];
1832 unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
1835 //Check if this bit is set for highlighting
1836 if($j&1) {
1837 //So this bit is set ...
1838 //Check if it belongs to group 0 or the actual stylegroup
1839 if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
1840 $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
1841 } else {
1842 if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
1843 $this->language_data['NUMBERS_CACHE'][0] = 0;
1845 $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
1854 * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
1855 * This function makes stylesheet generators much faster as they do not need these caches.
1857 * @since 1.0.8
1858 * @access private
1860 function build_parse_cache() {
1861 // cache symbol regexp
1862 //As this is a costy operation, we avoid doing it for multiple groups ...
1863 //Instead we perform it for all symbols at once.
1865 //For this to work, we need to reorganize the data arrays.
1866 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
1867 $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
1869 $this->language_data['SYMBOL_DATA'] = array();
1870 $symbol_preg_multi = array(); // multi char symbols
1871 $symbol_preg_single = array(); // single char symbols
1872 foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
1873 if (is_array($symbols)) {
1874 foreach ($symbols as $sym) {
1875 $sym = $this->hsc($sym);
1876 if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
1877 $this->language_data['SYMBOL_DATA'][$sym] = $key;
1878 if (isset($sym[1])) { // multiple chars
1879 $symbol_preg_multi[] = preg_quote($sym, '/');
1880 } else { // single char
1881 if ($sym == '-') {
1882 // don't trigger range out of order error
1883 $symbol_preg_single[] = '\-';
1884 } else {
1885 $symbol_preg_single[] = preg_quote($sym, '/');
1890 } else {
1891 $symbols = $this->hsc($symbols);
1892 if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
1893 $this->language_data['SYMBOL_DATA'][$symbols] = 0;
1894 if (isset($symbols[1])) { // multiple chars
1895 $symbol_preg_multi[] = preg_quote($symbols, '/');
1896 } else if ($symbols == '-') {
1897 // don't trigger range out of order error
1898 $symbol_preg_single[] = '\-';
1899 } else { // single char
1900 $symbol_preg_single[] = preg_quote($symbols, '/');
1906 //Now we have an array with each possible symbol as the key and the style as the actual data.
1907 //This way we can set the correct style just the moment we highlight ...
1909 //Now we need to rewrite our array to get a search string that
1910 $symbol_preg = array();
1911 if (!empty($symbol_preg_multi)) {
1912 rsort($symbol_preg_multi);
1913 $symbol_preg[] = implode('|', $symbol_preg_multi);
1915 if (!empty($symbol_preg_single)) {
1916 rsort($symbol_preg_single);
1917 $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
1919 $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
1922 // cache optimized regexp for keyword matching
1923 // remove old cache
1924 $this->language_data['CACHED_KEYWORD_LISTS'] = array();
1925 foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
1926 if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
1927 $this->lexic_permissions['KEYWORDS'][$key]) {
1928 $this->optimize_keyword_group($key);
1932 // brackets
1933 if ($this->lexic_permissions['BRACKETS']) {
1934 $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
1935 if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
1936 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
1937 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
1938 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
1939 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
1940 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
1941 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
1942 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
1945 else {
1946 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
1947 '<| class="br0">&#91;|>',
1948 '<| class="br0">&#93;|>',
1949 '<| class="br0">&#40;|>',
1950 '<| class="br0">&#41;|>',
1951 '<| class="br0">&#123;|>',
1952 '<| class="br0">&#125;|>',
1957 //Build the parse cache needed to highlight numbers appropriate
1958 if($this->lexic_permissions['NUMBERS']) {
1959 //Check if the style rearrangements have been processed ...
1960 //This also does some preprocessing to check which style groups are useable ...
1961 if(!isset($this->language_data['NUMBERS_CACHE'])) {
1962 $this->build_style_cache();
1965 //Number format specification
1966 //All this formats are matched case-insensitively!
1967 static $numbers_format = array(
1968 GESHI_NUMBER_INT_BASIC =>
1969 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?!(?m:$)))',
1970 GESHI_NUMBER_INT_CSTYLE =>
1971 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z\.])',
1972 GESHI_NUMBER_BIN_SUFFIX =>
1973 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?b(?![0-9a-z\.])',
1974 GESHI_NUMBER_BIN_PREFIX_PERCENT =>
1975 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z\.])',
1976 GESHI_NUMBER_BIN_PREFIX_0B =>
1977 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z\.])',
1978 GESHI_NUMBER_OCT_PREFIX =>
1979 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z\.])',
1980 GESHI_NUMBER_OCT_SUFFIX =>
1981 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z\.])',
1982 GESHI_NUMBER_HEX_PREFIX =>
1983 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-f]+?(?![0-9a-z\.])',
1984 GESHI_NUMBER_HEX_SUFFIX =>
1985 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-f]*?h(?![0-9a-z\.])',
1986 GESHI_NUMBER_FLT_NONSCI =>
1987 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z\.])',
1988 GESHI_NUMBER_FLT_NONSCI_F =>
1989 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z\.])',
1990 GESHI_NUMBER_FLT_SCI_SHORT =>
1991 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z\.])',
1992 GESHI_NUMBER_FLT_SCI_ZERO =>
1993 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z\.])'
1996 //At this step we have an associative array with flag groups for a
1997 //specific style or an string denoting a regexp given its index.
1998 $this->language_data['NUMBERS_RXCACHE'] = array();
1999 foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
2000 if(is_string($rxdata)) {
2001 $regexp = $rxdata;
2002 } else {
2003 //This is a bitfield of number flags to highlight:
2004 //Build an array, implode them together and make this the actual RX
2005 $rxuse = array();
2006 for($i = 1; $i <= $rxdata; $i<<=1) {
2007 if($rxdata & $i) {
2008 $rxuse[] = $numbers_format[$i];
2011 $regexp = implode("|", $rxuse);
2014 $this->language_data['NUMBERS_RXCACHE'][$key] =
2015 "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!\|>)(?![^\"\|\>\<]+<)/i";
2019 $this->parse_cache_built = true;
2023 * Returns the code in $this->source, highlighted and surrounded by the
2024 * nessecary HTML.
2026 * This should only be called ONCE, cos it's SLOW! If you want to highlight
2027 * the same source multiple times, you're better off doing a whole lot of
2028 * str_replaces to replace the &lt;span&gt;s
2030 * @since 1.0.0
2032 function parse_code () {
2033 // Start the timer
2034 $start_time = microtime();
2036 // Firstly, if there is an error, we won't highlight
2037 if ($this->error) {
2038 //Escape the source for output
2039 $result = $this->hsc($this->source);
2041 //This fix is related to SF#1923020, but has to be applied regardless of
2042 //actually highlighting symbols.
2043 $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
2045 // Timing is irrelevant
2046 $this->set_time($start_time, $start_time);
2047 $this->finalise($result);
2048 return $result;
2051 // make sure the parse cache is up2date
2052 if (!$this->parse_cache_built) {
2053 $this->build_parse_cache();
2056 // Replace all newlines to a common form.
2057 $code = str_replace("\r\n", "\n", $this->source);
2058 $code = str_replace("\r", "\n", $code);
2060 // Add spaces for regular expression matching and line numbers
2061 // $code = "\n" . $code . "\n";
2063 // Initialise various stuff
2064 $length = strlen($code);
2065 $COMMENT_MATCHED = false;
2066 $stuff_to_parse = '';
2067 $endresult = '';
2069 // "Important" selections are handled like multiline comments
2070 // @todo GET RID OF THIS SHIZ
2071 if ($this->enable_important_blocks) {
2072 $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
2075 if ($this->strict_mode) {
2076 // Break the source into bits. Each bit will be a portion of the code
2077 // within script delimiters - for example, HTML between < and >
2078 $k = 0;
2079 $parts = array();
2080 $matches = array();
2081 $next_match_pointer = null;
2082 // we use a copy to unset delimiters on demand (when they are not found)
2083 $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
2084 $i = 0;
2085 while ($i < $length) {
2086 $next_match_pos = $length + 1; // never true
2087 foreach ($delim_copy as $dk => $delimiters) {
2088 if(is_array($delimiters)) {
2089 foreach ($delimiters as $open => $close) {
2090 // make sure the cache is setup properly
2091 if (!isset($matches[$dk][$open])) {
2092 $matches[$dk][$open] = array(
2093 'next_match' => -1,
2094 'dk' => $dk,
2096 'open' => $open, // needed for grouping of adjacent code blocks (see below)
2097 'open_strlen' => strlen($open),
2099 'close' => $close,
2100 'close_strlen' => strlen($close),
2103 // Get the next little bit for this opening string
2104 if ($matches[$dk][$open]['next_match'] < $i) {
2105 // only find the next pos if it was not already cached
2106 $open_pos = strpos($code, $open, $i);
2107 if ($open_pos === false) {
2108 // no match for this delimiter ever
2109 unset($delim_copy[$dk][$open]);
2110 continue;
2112 $matches[$dk][$open]['next_match'] = $open_pos;
2114 if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
2115 //So we got a new match, update the close_pos
2116 $matches[$dk][$open]['close_pos'] =
2117 strpos($code, $close, $matches[$dk][$open]['next_match']+1);
2119 $next_match_pointer =& $matches[$dk][$open];
2120 $next_match_pos = $matches[$dk][$open]['next_match'];
2123 } else {
2124 //So we should match an RegExp as Strict Block ...
2126 * The value in $delimiters is expected to be an RegExp
2127 * containing exactly 2 matching groups:
2128 * - Group 1 is the opener
2129 * - Group 2 is the closer
2131 if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work.
2132 preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
2133 //We got a match ...
2134 if(isset($matches_rx['start']) && isset($matches_rx['end']))
2136 $matches[$dk] = array(
2137 'next_match' => $matches_rx['start'][1],
2138 'dk' => $dk,
2140 'close_strlen' => strlen($matches_rx['end'][0]),
2141 'close_pos' => $matches_rx['end'][1],
2143 } else {
2144 $matches[$dk] = array(
2145 'next_match' => $matches_rx[1][1],
2146 'dk' => $dk,
2148 'close_strlen' => strlen($matches_rx[2][0]),
2149 'close_pos' => $matches_rx[2][1],
2152 } else {
2153 // no match for this delimiter ever
2154 unset($delim_copy[$dk]);
2155 continue;
2158 if ($matches[$dk]['next_match'] <= $next_match_pos) {
2159 $next_match_pointer =& $matches[$dk];
2160 $next_match_pos = $matches[$dk]['next_match'];
2165 // non-highlightable text
2166 $parts[$k] = array(
2167 1 => substr($code, $i, $next_match_pos - $i)
2169 ++$k;
2171 if ($next_match_pos > $length) {
2172 // out of bounds means no next match was found
2173 break;
2176 // highlightable code
2177 $parts[$k][0] = $next_match_pointer['dk'];
2179 //Only combine for non-rx script blocks
2180 if(is_array($delim_copy[$next_match_pointer['dk']])) {
2181 // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
2182 $i = $next_match_pos + $next_match_pointer['open_strlen'];
2183 while (true) {
2184 $close_pos = strpos($code, $next_match_pointer['close'], $i);
2185 if ($close_pos == false) {
2186 break;
2188 $i = $close_pos + $next_match_pointer['close_strlen'];
2189 if ($i == $length) {
2190 break;
2192 if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
2193 substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
2194 // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
2195 foreach ($matches as $submatches) {
2196 foreach ($submatches as $match) {
2197 if ($match['next_match'] == $i) {
2198 // a different block already matches here!
2199 break 3;
2203 } else {
2204 break;
2207 } else {
2208 $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
2209 $i = $close_pos;
2212 if ($close_pos === false) {
2213 // no closing delimiter found!
2214 $parts[$k][1] = substr($code, $next_match_pos);
2215 ++$k;
2216 break;
2217 } else {
2218 $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
2219 ++$k;
2222 unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
2223 $num_parts = $k;
2225 if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
2226 // when we have only one part, we don't have anything to highlight at all.
2227 // if we have a "maybe" strict language, this should be handled as highlightable code
2228 $parts = array(
2229 0 => array(
2230 0 => '',
2231 1 => ''
2233 1 => array(
2234 0 => null,
2235 1 => $parts[0][1]
2238 $num_parts = 2;
2241 } else {
2242 // Not strict mode - simply dump the source into
2243 // the array at index 1 (the first highlightable block)
2244 $parts = array(
2245 0 => array(
2246 0 => '',
2247 1 => ''
2249 1 => array(
2250 0 => null,
2251 1 => $code
2254 $num_parts = 2;
2257 //Unset variables we won't need any longer
2258 unset($code);
2260 //Preload some repeatedly used values regarding hardquotes ...
2261 $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
2262 $hq_strlen = strlen($hq);
2264 //Preload if line numbers are to be generated afterwards
2265 //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
2266 $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
2267 !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
2269 //preload the escape char for faster checking ...
2270 $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
2272 // this is used for single-line comments
2273 $sc_disallowed_before = "";
2274 $sc_disallowed_after = "";
2276 if (isset($this->language_data['PARSER_CONTROL'])) {
2277 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
2278 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
2279 $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
2281 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
2282 $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
2287 //Fix for SF#1932083: Multichar Quotemarks unsupported
2288 $is_string_starter = array();
2289 if ($this->lexic_permissions['STRINGS']) {
2290 foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
2291 if (!isset($is_string_starter[$quotemark[0]])) {
2292 $is_string_starter[$quotemark[0]] = (string)$quotemark;
2293 } else if (is_string($is_string_starter[$quotemark[0]])) {
2294 $is_string_starter[$quotemark[0]] = array(
2295 $is_string_starter[$quotemark[0]],
2296 $quotemark);
2297 } else {
2298 $is_string_starter[$quotemark[0]][] = $quotemark;
2303 // Now we go through each part. We know that even-indexed parts are
2304 // code that shouldn't be highlighted, and odd-indexed parts should
2305 // be highlighted
2306 for ($key = 0; $key < $num_parts; ++$key) {
2307 $STRICTATTRS = '';
2309 // If this block should be highlighted...
2310 if (!($key & 1)) {
2311 // Else not a block to highlight
2312 $endresult .= $this->hsc($parts[$key][1]);
2313 unset($parts[$key]);
2314 continue;
2317 $result = '';
2318 $part = $parts[$key][1];
2320 $highlight_part = true;
2321 if ($this->strict_mode && !is_null($parts[$key][0])) {
2322 // get the class key for this block of code
2323 $script_key = $parts[$key][0];
2324 $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
2325 if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
2326 $this->lexic_permissions['SCRIPT']) {
2327 // Add a span element around the source to
2328 // highlight the overall source block
2329 if (!$this->use_classes &&
2330 $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
2331 $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
2332 } else {
2333 $attributes = ' class="sc' . $script_key . '"';
2335 $result .= "<span$attributes>";
2336 $STRICTATTRS = $attributes;
2340 if ($highlight_part) {
2341 // Now, highlight the code in this block. This code
2342 // is really the engine of GeSHi (along with the method
2343 // parse_non_string_part).
2345 // cache comment regexps incrementally
2346 $next_comment_regexp_key = '';
2347 $next_comment_regexp_pos = -1;
2348 $next_comment_multi_pos = -1;
2349 $next_comment_single_pos = -1;
2350 $comment_regexp_cache_per_key = array();
2351 $comment_multi_cache_per_key = array();
2352 $comment_single_cache_per_key = array();
2353 $next_open_comment_multi = '';
2354 $next_comment_single_key = '';
2355 $escape_regexp_cache_per_key = array();
2356 $next_escape_regexp_key = '';
2357 $next_escape_regexp_pos = -1;
2359 $length = strlen($part);
2360 for ($i = 0; $i < $length; ++$i) {
2361 // Get the next char
2362 $char = $part[$i];
2363 $char_len = 1;
2365 // update regexp comment cache if needed
2366 if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
2367 $next_comment_regexp_pos = $length;
2368 foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
2369 $match_i = false;
2370 if (isset($comment_regexp_cache_per_key[$comment_key]) &&
2371 ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
2372 $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
2373 // we have already matched something
2374 if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
2375 // this comment is never matched
2376 continue;
2378 $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
2379 } else if (
2380 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2381 (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) ||
2382 (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i))
2384 $match_i = $match[0][1];
2385 if (GESHI_PHP_PRE_433) {
2386 $match_i += $i;
2389 $comment_regexp_cache_per_key[$comment_key] = array(
2390 'key' => $comment_key,
2391 'length' => strlen($match[0][0]),
2392 'pos' => $match_i
2394 } else {
2395 $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
2396 continue;
2399 if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
2400 $next_comment_regexp_pos = $match_i;
2401 $next_comment_regexp_key = $comment_key;
2402 if ($match_i === $i) {
2403 break;
2409 $string_started = false;
2411 if (isset($is_string_starter[$char])) {
2412 // Possibly the start of a new string ...
2414 //Check which starter it was ...
2415 //Fix for SF#1932083: Multichar Quotemarks unsupported
2416 if (is_array($is_string_starter[$char])) {
2417 $char_new = '';
2418 foreach ($is_string_starter[$char] as $testchar) {
2419 if ($testchar === substr($part, $i, strlen($testchar)) &&
2420 strlen($testchar) > strlen($char_new)) {
2421 $char_new = $testchar;
2422 $string_started = true;
2425 if ($string_started) {
2426 $char = $char_new;
2428 } else {
2429 $testchar = $is_string_starter[$char];
2430 if ($testchar === substr($part, $i, strlen($testchar))) {
2431 $char = $testchar;
2432 $string_started = true;
2435 $char_len = strlen($char);
2438 if ($string_started && ($i != $next_comment_regexp_pos)) {
2439 // Hand out the correct style information for this string
2440 $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
2441 if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
2442 !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
2443 $string_key = 0;
2446 // parse the stuff before this
2447 $result .= $this->parse_non_string_part($stuff_to_parse);
2448 $stuff_to_parse = '';
2450 if (!$this->use_classes) {
2451 $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
2452 } else {
2453 $string_attributes = ' class="st'.$string_key.'"';
2456 // now handle the string
2457 $string = "<span$string_attributes>" . GeSHi::hsc($char);
2458 $start = $i + $char_len;
2459 $string_open = true;
2461 if(empty($this->language_data['ESCAPE_REGEXP'])) {
2462 $next_escape_regexp_pos = $length;
2465 do {
2466 //Get the regular ending pos ...
2467 $close_pos = strpos($part, $char, $start);
2468 if(false === $close_pos) {
2469 $close_pos = $length;
2472 if($this->lexic_permissions['ESCAPE_CHAR']) {
2473 // update escape regexp cache if needed
2474 if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
2475 $next_escape_regexp_pos = $length;
2476 foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
2477 $match_i = false;
2478 if (isset($escape_regexp_cache_per_key[$escape_key]) &&
2479 ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
2480 $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
2481 // we have already matched something
2482 if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
2483 // this comment is never matched
2484 continue;
2486 $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
2487 } else if (
2488 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2489 (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE)) ||
2490 (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start))
2492 $match_i = $match[0][1];
2493 if (GESHI_PHP_PRE_433) {
2494 $match_i += $start;
2497 $escape_regexp_cache_per_key[$escape_key] = array(
2498 'key' => $escape_key,
2499 'length' => strlen($match[0][0]),
2500 'pos' => $match_i
2502 } else {
2503 $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
2504 continue;
2507 if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
2508 $next_escape_regexp_pos = $match_i;
2509 $next_escape_regexp_key = $escape_key;
2510 if ($match_i === $start) {
2511 break;
2517 //Find the next simple escape position
2518 if('' != $this->language_data['ESCAPE_CHAR']) {
2519 $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
2520 if(false === $simple_escape) {
2521 $simple_escape = $length;
2523 } else {
2524 $simple_escape = $length;
2526 } else {
2527 $next_escape_regexp_pos = $length;
2528 $simple_escape = $length;
2531 if($simple_escape < $next_escape_regexp_pos &&
2532 $simple_escape < $length &&
2533 $simple_escape < $close_pos) {
2534 //The nexxt escape sequence is a simple one ...
2535 $es_pos = $simple_escape;
2537 //Add the stuff not in the string yet ...
2538 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2540 //Get the style for this escaped char ...
2541 if (!$this->use_classes) {
2542 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
2543 } else {
2544 $escape_char_attributes = ' class="es0"';
2547 //Add the style for the escape char ...
2548 $string .= "<span$escape_char_attributes>" .
2549 GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
2551 //Get the byte AFTER the ESCAPE_CHAR we just found
2552 $es_char = $part[$es_pos + 1];
2553 if ($es_char == "\n") {
2554 // don't put a newline around newlines
2555 $string .= "</span>\n";
2556 $start = $es_pos + 2;
2557 } else if (ord($es_char) >= 128) {
2558 //This is an non-ASCII char (UTF8 or single byte)
2559 //This code tries to work around SF#2037598 ...
2560 if(function_exists('mb_substr')) {
2561 $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
2562 $string .= $es_char_m . '</span>';
2563 } else if (!GESHI_PHP_PRE_433 && 'utf-8' == $this->encoding) {
2564 if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
2565 "|\xE0[\xA0-\xBF][\x80-\xBF]".
2566 "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
2567 "|\xED[\x80-\x9F][\x80-\xBF]".
2568 "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
2569 "|[\xF1-\xF3][\x80-\xBF]{3}".
2570 "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
2571 $part, $es_char_m, null, $es_pos + 1)) {
2572 $es_char_m = $es_char_m[0];
2573 } else {
2574 $es_char_m = $es_char;
2576 $string .= $this->hsc($es_char_m) . '</span>';
2577 } else {
2578 $es_char_m = $this->hsc($es_char);
2580 $start = $es_pos + strlen($es_char_m) + 1;
2581 } else {
2582 $string .= $this->hsc($es_char) . '</span>';
2583 $start = $es_pos + 2;
2585 } else if ($next_escape_regexp_pos < $length &&
2586 $next_escape_regexp_pos < $close_pos) {
2587 $es_pos = $next_escape_regexp_pos;
2588 //Add the stuff not in the string yet ...
2589 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2591 //Get the key and length of this match ...
2592 $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
2593 $escape_str = substr($part, $es_pos, $escape['length']);
2594 $escape_key = $escape['key'];
2596 //Get the style for this escaped char ...
2597 if (!$this->use_classes) {
2598 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
2599 } else {
2600 $escape_char_attributes = ' class="es' . $escape_key . '"';
2603 //Add the style for the escape char ...
2604 $string .= "<span$escape_char_attributes>" .
2605 $this->hsc($escape_str) . '</span>';
2607 $start = $es_pos + $escape['length'];
2608 } else {
2609 //Copy the remainder of the string ...
2610 $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
2611 $start = $close_pos + $char_len;
2612 $string_open = false;
2614 } while($string_open);
2616 if ($check_linenumbers) {
2617 // Are line numbers used? If, we should end the string before
2618 // the newline and begin it again (so when <li>s are put in the source
2619 // remains XHTML compliant)
2620 // note to self: This opens up possibility of config files specifying
2621 // that languages can/cannot have multiline strings???
2622 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2625 $result .= $string;
2626 $string = '';
2627 $i = $start - 1;
2628 continue;
2629 } else if ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
2630 substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) {
2631 // The start of a hard quoted string
2632 if (!$this->use_classes) {
2633 $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
2634 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
2635 } else {
2636 $string_attributes = ' class="st_h"';
2637 $escape_char_attributes = ' class="es_h"';
2639 // parse the stuff before this
2640 $result .= $this->parse_non_string_part($stuff_to_parse);
2641 $stuff_to_parse = '';
2643 // now handle the string
2644 $string = '';
2646 // look for closing quote
2647 $start = $i + $hq_strlen;
2648 while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
2649 $start = $close_pos + 1;
2650 if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR']) {
2651 // make sure this quote is not escaped
2652 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2653 if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
2654 // check wether this quote is escaped or if it is something like '\\'
2655 $escape_char_pos = $close_pos - 1;
2656 while ($escape_char_pos > 0
2657 && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
2658 --$escape_char_pos;
2660 if (($close_pos - $escape_char_pos) & 1) {
2661 // uneven number of escape chars => this quote is escaped
2662 continue 2;
2668 // found closing quote
2669 break;
2672 //Found the closing delimiter?
2673 if (!$close_pos) {
2674 // span till the end of this $part when no closing delimiter is found
2675 $close_pos = $length;
2678 //Get the actual string
2679 $string = substr($part, $i, $close_pos - $i + 1);
2680 $i = $close_pos;
2682 // handle escape chars and encode html chars
2683 // (special because when we have escape chars within our string they may not be escaped)
2684 if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
2685 $start = 0;
2686 $new_string = '';
2687 while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
2688 // hmtl escape stuff before
2689 $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
2690 // check if this is a hard escape
2691 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2692 if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
2693 // indeed, this is a hardescape
2694 $new_string .= "<span$escape_char_attributes>" .
2695 $this->hsc($hardescape) . '</span>';
2696 $start = $es_pos + strlen($hardescape);
2697 continue 2;
2700 // not a hard escape, but a normal escape
2701 // they come in pairs of two
2702 $c = 0;
2703 while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
2704 && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
2705 && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
2706 $c += 2;
2708 if ($c) {
2709 $new_string .= "<span$escape_char_attributes>" .
2710 str_repeat($escaped_escape_char, $c) .
2711 '</span>';
2712 $start = $es_pos + $c;
2713 } else {
2714 // this is just a single lonely escape char...
2715 $new_string .= $escaped_escape_char;
2716 $start = $es_pos + 1;
2719 $string = $new_string . $this->hsc(substr($string, $start));
2720 } else {
2721 $string = $this->hsc($string);
2724 if ($check_linenumbers) {
2725 // Are line numbers used? If, we should end the string before
2726 // the newline and begin it again (so when <li>s are put in the source
2727 // remains XHTML compliant)
2728 // note to self: This opens up possibility of config files specifying
2729 // that languages can/cannot have multiline strings???
2730 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2733 $result .= "<span$string_attributes>" . $string . '</span>';
2734 $string = '';
2735 continue;
2736 } else {
2737 //Have a look for regexp comments
2738 if ($i == $next_comment_regexp_pos) {
2739 $COMMENT_MATCHED = true;
2740 $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
2741 $test_str = $this->hsc(substr($part, $i, $comment['length']));
2743 //@todo If remove important do remove here
2744 if ($this->lexic_permissions['COMMENTS']['MULTI']) {
2745 if (!$this->use_classes) {
2746 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
2747 } else {
2748 $attributes = ' class="co' . $comment['key'] . '"';
2751 $test_str = "<span$attributes>" . $test_str . "</span>";
2753 // Short-cut through all the multiline code
2754 if ($check_linenumbers) {
2755 // strreplace to put close span and open span around multiline newlines
2756 $test_str = str_replace(
2757 "\n", "</span>\n<span$attributes>",
2758 str_replace("\n ", "\n&nbsp;", $test_str)
2763 $i += $comment['length'] - 1;
2765 // parse the rest
2766 $result .= $this->parse_non_string_part($stuff_to_parse);
2767 $stuff_to_parse = '';
2770 // If we haven't matched a regexp comment, try multi-line comments
2771 if (!$COMMENT_MATCHED) {
2772 // Is this a multiline comment?
2773 if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
2774 $next_comment_multi_pos = $length;
2775 foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
2776 $match_i = false;
2777 if (isset($comment_multi_cache_per_key[$open]) &&
2778 ($comment_multi_cache_per_key[$open] >= $i ||
2779 $comment_multi_cache_per_key[$open] === false)) {
2780 // we have already matched something
2781 if ($comment_multi_cache_per_key[$open] === false) {
2782 // this comment is never matched
2783 continue;
2785 $match_i = $comment_multi_cache_per_key[$open];
2786 } else if (($match_i = stripos($part, $open, $i)) !== false) {
2787 $comment_multi_cache_per_key[$open] = $match_i;
2788 } else {
2789 $comment_multi_cache_per_key[$open] = false;
2790 continue;
2792 if ($match_i !== false && $match_i < $next_comment_multi_pos) {
2793 $next_comment_multi_pos = $match_i;
2794 $next_open_comment_multi = $open;
2795 if ($match_i === $i) {
2796 break;
2801 if ($i == $next_comment_multi_pos) {
2802 $open = $next_open_comment_multi;
2803 $close = $this->language_data['COMMENT_MULTI'][$open];
2804 $open_strlen = strlen($open);
2805 $close_strlen = strlen($close);
2806 $COMMENT_MATCHED = true;
2807 $test_str_match = $open;
2808 //@todo If remove important do remove here
2809 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2810 $open == GESHI_START_IMPORTANT) {
2811 if ($open != GESHI_START_IMPORTANT) {
2812 if (!$this->use_classes) {
2813 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
2814 } else {
2815 $attributes = ' class="coMULTI"';
2817 $test_str = "<span$attributes>" . $this->hsc($open);
2818 } else {
2819 if (!$this->use_classes) {
2820 $attributes = ' style="' . $this->important_styles . '"';
2821 } else {
2822 $attributes = ' class="imp"';
2825 // We don't include the start of the comment if it's an
2826 // "important" part
2827 $test_str = "<span$attributes>";
2829 } else {
2830 $test_str = $this->hsc($open);
2833 $close_pos = strpos( $part, $close, $i + $open_strlen );
2835 if ($close_pos === false) {
2836 $close_pos = $length;
2839 // Short-cut through all the multiline code
2840 $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
2841 if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
2842 $test_str_match == GESHI_START_IMPORTANT) &&
2843 $check_linenumbers) {
2845 // strreplace to put close span and open span around multiline newlines
2846 $test_str .= str_replace(
2847 "\n", "</span>\n<span$attributes>",
2848 str_replace("\n ", "\n&nbsp;", $rest_of_comment)
2850 } else {
2851 $test_str .= $rest_of_comment;
2854 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2855 $test_str_match == GESHI_START_IMPORTANT) {
2856 $test_str .= '</span>';
2859 $i = $close_pos + $close_strlen - 1;
2861 // parse the rest
2862 $result .= $this->parse_non_string_part($stuff_to_parse);
2863 $stuff_to_parse = '';
2867 // If we haven't matched a multiline comment, try single-line comments
2868 if (!$COMMENT_MATCHED) {
2869 // cache potential single line comment occurances
2870 if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
2871 $next_comment_single_pos = $length;
2872 foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
2873 $match_i = false;
2874 if (isset($comment_single_cache_per_key[$comment_key]) &&
2875 ($comment_single_cache_per_key[$comment_key] >= $i ||
2876 $comment_single_cache_per_key[$comment_key] === false)) {
2877 // we have already matched something
2878 if ($comment_single_cache_per_key[$comment_key] === false) {
2879 // this comment is never matched
2880 continue;
2882 $match_i = $comment_single_cache_per_key[$comment_key];
2883 } else if (
2884 // case sensitive comments
2885 ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
2886 ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
2887 // non case sensitive
2888 (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
2889 (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
2890 $comment_single_cache_per_key[$comment_key] = $match_i;
2891 } else {
2892 $comment_single_cache_per_key[$comment_key] = false;
2893 continue;
2895 if ($match_i !== false && $match_i < $next_comment_single_pos) {
2896 $next_comment_single_pos = $match_i;
2897 $next_comment_single_key = $comment_key;
2898 if ($match_i === $i) {
2899 break;
2904 if ($next_comment_single_pos == $i) {
2905 $comment_key = $next_comment_single_key;
2906 $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
2907 $com_len = strlen($comment_mark);
2909 // This check will find special variables like $# in bash
2910 // or compiler directives of Delphi beginning {$
2911 if ((empty($sc_disallowed_before) || ($i == 0) ||
2912 (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
2913 (empty($sc_disallowed_after) || ($length <= $i + $com_len) ||
2914 (false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
2916 // this is a valid comment
2917 $COMMENT_MATCHED = true;
2918 if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
2919 if (!$this->use_classes) {
2920 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
2921 } else {
2922 $attributes = ' class="co' . $comment_key . '"';
2924 $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
2925 } else {
2926 $test_str = $this->hsc($comment_mark);
2929 //Check if this comment is the last in the source
2930 $close_pos = strpos($part, "\n", $i);
2931 $oops = false;
2932 if ($close_pos === false) {
2933 $close_pos = $length;
2934 $oops = true;
2936 $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
2937 if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
2938 $test_str .= "</span>";
2941 // Take into account that the comment might be the last in the source
2942 if (!$oops) {
2943 $test_str .= "\n";
2946 $i = $close_pos;
2948 // parse the rest
2949 $result .= $this->parse_non_string_part($stuff_to_parse);
2950 $stuff_to_parse = '';
2956 // Where are we adding this char?
2957 if (!$COMMENT_MATCHED) {
2958 $stuff_to_parse .= $char;
2959 } else {
2960 $result .= $test_str;
2961 unset($test_str);
2962 $COMMENT_MATCHED = false;
2965 // Parse the last bit
2966 $result .= $this->parse_non_string_part($stuff_to_parse);
2967 $stuff_to_parse = '';
2968 } else {
2969 $result .= $this->hsc($part);
2971 // Close the <span> that surrounds the block
2972 if ($STRICTATTRS != '') {
2973 $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
2974 $result .= '</span>';
2977 $endresult .= $result;
2978 unset($part, $parts[$key], $result);
2981 //This fix is related to SF#1923020, but has to be applied regardless of
2982 //actually highlighting symbols.
2983 /** NOTE: memorypeak #3 */
2984 $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult);
2986 // // Parse the last stuff (redundant?)
2987 // $result .= $this->parse_non_string_part($stuff_to_parse);
2989 // Lop off the very first and last spaces
2990 // $result = substr($result, 1, -1);
2992 // We're finished: stop timing
2993 $this->set_time($start_time, microtime());
2995 $this->finalise($endresult);
2996 return $endresult;
3000 * Swaps out spaces and tabs for HTML indentation. Not needed if
3001 * the code is in a pre block...
3003 * @param string The source to indent (reference!)
3004 * @since 1.0.0
3005 * @access private
3007 function indent(&$result) {
3008 /// Replace tabs with the correct number of spaces
3009 if (false !== strpos($result, "\t")) {
3010 $lines = explode("\n", $result);
3011 $result = null;//Save memory while we process the lines individually
3012 $tab_width = $this->get_real_tab_width();
3013 $tab_string = '&nbsp;' . str_repeat(' ', $tab_width);
3015 for ($key = 0, $n = count($lines); $key < $n; $key++) {
3016 $line = $lines[$key];
3017 if (false === strpos($line, "\t")) {
3018 continue;
3021 $pos = 0;
3022 $length = strlen($line);
3023 $lines[$key] = ''; // reduce memory
3025 $IN_TAG = false;
3026 for ($i = 0; $i < $length; ++$i) {
3027 $char = $line[$i];
3028 // Simple engine to work out whether we're in a tag.
3029 // If we are we modify $pos. This is so we ignore HTML
3030 // in the line and only workout the tab replacement
3031 // via the actual content of the string
3032 // This test could be improved to include strings in the
3033 // html so that < or > would be allowed in user's styles
3034 // (e.g. quotes: '<' '>'; or similar)
3035 if ($IN_TAG) {
3036 if ('>' == $char) {
3037 $IN_TAG = false;
3039 $lines[$key] .= $char;
3040 } else if ('<' == $char) {
3041 $IN_TAG = true;
3042 $lines[$key] .= '<';
3043 } else if ('&' == $char) {
3044 $substr = substr($line, $i + 3, 5);
3045 $posi = strpos($substr, ';');
3046 if (false === $posi) {
3047 ++$pos;
3048 } else {
3049 $pos -= $posi+2;
3051 $lines[$key] .= $char;
3052 } else if ("\t" == $char) {
3053 $str = '';
3054 // OPTIMISE - move $strs out. Make an array:
3055 // $tabs = array(
3056 // 1 => '&nbsp;',
3057 // 2 => '&nbsp; ',
3058 // 3 => '&nbsp; &nbsp;' etc etc
3059 // to use instead of building a string every time
3060 $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
3061 if (($pos & 1) || 1 == $tab_end_width) {
3062 $str .= substr($tab_string, 6, $tab_end_width);
3063 } else {
3064 $str .= substr($tab_string, 0, $tab_end_width+5);
3066 $lines[$key] .= $str;
3067 $pos += $tab_end_width;
3069 if (false === strpos($line, "\t", $i + 1)) {
3070 $lines[$key] .= substr($line, $i + 1);
3071 break;
3073 } else if (0 == $pos && ' ' == $char) {
3074 $lines[$key] .= '&nbsp;';
3075 ++$pos;
3076 } else {
3077 $lines[$key] .= $char;
3078 ++$pos;
3082 $result = implode("\n", $lines);
3083 unset($lines);//We don't need the lines separated beyond this --- free them!
3085 // Other whitespace
3086 // BenBE: Fix to reduce the number of replacements to be done
3087 $result = preg_replace('/^ /m', '&nbsp;', $result);
3088 $result = str_replace(' ', ' &nbsp;', $result);
3090 if ($this->line_numbers == GESHI_NO_LINE_NUMBERS) {
3091 if ($this->line_ending === null) {
3092 $result = nl2br($result);
3093 } else {
3094 $result = str_replace("\n", $this->line_ending, $result);
3100 * Changes the case of a keyword for those languages where a change is asked for
3102 * @param string The keyword to change the case of
3103 * @return string The keyword with its case changed
3104 * @since 1.0.0
3105 * @access private
3107 function change_case($instr) {
3108 switch ($this->language_data['CASE_KEYWORDS']) {
3109 case GESHI_CAPS_UPPER:
3110 return strtoupper($instr);
3111 case GESHI_CAPS_LOWER:
3112 return strtolower($instr);
3113 default:
3114 return $instr;
3119 * Handles replacements of keywords to include markup and links if requested
3121 * @param string The keyword to add the Markup to
3122 * @return The HTML for the match found
3123 * @since 1.0.8
3124 * @access private
3126 * @todo Get rid of ender in keyword links
3128 function handle_keyword_replace($match) {
3129 $k = $this->_kw_replace_group;
3130 $keyword = $match[0];
3132 $before = '';
3133 $after = '';
3135 if ($this->keyword_links) {
3136 // Keyword links have been ebabled
3138 if (isset($this->language_data['URLS'][$k]) &&
3139 $this->language_data['URLS'][$k] != '') {
3140 // There is a base group for this keyword
3142 // Old system: strtolower
3143 //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
3144 // New system: get keyword from language file to get correct case
3145 if (!$this->language_data['CASE_SENSITIVE'][$k] &&
3146 strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
3147 foreach ($this->language_data['KEYWORDS'][$k] as $word) {
3148 if (strcasecmp($word, $keyword) == 0) {
3149 break;
3152 } else {
3153 $word = $keyword;
3156 $before = '<|UR1|"' .
3157 str_replace(
3158 array(
3159 '{FNAME}',
3160 '{FNAMEL}',
3161 '{FNAMEU}',
3162 '.'),
3163 array(
3164 str_replace('+', '%20', urlencode($this->hsc($word))),
3165 str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
3166 str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
3167 '<DOT>'),
3168 $this->language_data['URLS'][$k]
3169 ) . '">';
3170 $after = '</a>';
3174 return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
3178 * handles regular expressions highlighting-definitions with callback functions
3180 * @note this is a callback, don't use it directly
3182 * @param array the matches array
3183 * @return The highlighted string
3184 * @since 1.0.8
3185 * @access private
3187 function handle_regexps_callback($matches) {
3188 // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
3189 return ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>';
3193 * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this
3195 * @note this is a callback, don't use it directly
3197 * @param array the matches array
3198 * @return string
3199 * @since 1.0.8
3200 * @access private
3202 function handle_multiline_regexps($matches) {
3203 $before = $this->_hmr_before;
3204 $after = $this->_hmr_after;
3205 if ($this->_hmr_replace) {
3206 $replace = $this->_hmr_replace;
3207 $search = array();
3209 foreach (array_keys($matches) as $k) {
3210 $search[] = '\\' . $k;
3213 $before = str_replace($search, $matches, $before);
3214 $after = str_replace($search, $matches, $after);
3215 $replace = str_replace($search, $matches, $replace);
3216 } else {
3217 $replace = $matches[0];
3219 return $before
3220 . '<|!REG3XP' . $this->_hmr_key .'!>'
3221 . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace)
3222 . '|>'
3223 . $after;
3227 * Takes a string that has no strings or comments in it, and highlights
3228 * stuff like keywords, numbers and methods.
3230 * @param string The string to parse for keyword, numbers etc.
3231 * @since 1.0.0
3232 * @access private
3233 * @todo BUGGY! Why? Why not build string and return?
3235 function parse_non_string_part($stuff_to_parse) {
3236 $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
3238 // Highlight keywords
3239 $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#;>|^&";
3240 $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;";
3241 if ($this->lexic_permissions['STRINGS']) {
3242 $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
3243 $disallowed_before .= $quotemarks;
3244 $disallowed_after .= $quotemarks;
3246 $disallowed_before .= "])";
3247 $disallowed_after .= "])";
3249 $parser_control_pergroup = false;
3250 if (isset($this->language_data['PARSER_CONTROL'])) {
3251 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
3252 $x = 0; // check wether per-keyword-group parser_control is enabled
3253 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
3254 $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
3255 ++$x;
3257 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
3258 $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
3259 ++$x;
3261 $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
3265 foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3266 if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
3267 $this->lexic_permissions['KEYWORDS'][$k]) {
3269 $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
3270 $modifiers = $case_sensitive ? '' : 'i';
3272 // NEW in 1.0.8 - per-keyword-group parser control
3273 $disallowed_before_local = $disallowed_before;
3274 $disallowed_after_local = $disallowed_after;
3275 if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
3276 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
3277 $disallowed_before_local =
3278 $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
3281 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
3282 $disallowed_after_local =
3283 $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
3287 $this->_kw_replace_group = $k;
3289 //NEW in 1.0.8, the cached regexp list
3290 // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
3291 for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set < $set_length; ++$set) {
3292 $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
3293 // Might make a more unique string for putting the number in soon
3294 // Basically, we don't put the styles in yet because then the styles themselves will
3295 // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
3296 $stuff_to_parse = preg_replace_callback(
3297 "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php))$disallowed_after_local/$modifiers",
3298 array($this, 'handle_keyword_replace'),
3299 $stuff_to_parse
3305 // Regular expressions
3306 foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
3307 if ($this->lexic_permissions['REGEXPS'][$key]) {
3308 if (is_array($regexp)) {
3309 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3310 // produce valid HTML when we match multiple lines
3311 $this->_hmr_replace = $regexp[GESHI_REPLACE];
3312 $this->_hmr_before = $regexp[GESHI_BEFORE];
3313 $this->_hmr_key = $key;
3314 $this->_hmr_after = $regexp[GESHI_AFTER];
3315 $stuff_to_parse = preg_replace_callback(
3316 "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
3317 array($this, 'handle_multiline_regexps'),
3318 $stuff_to_parse);
3319 $this->_hmr_replace = false;
3320 $this->_hmr_before = '';
3321 $this->_hmr_after = '';
3322 } else {
3323 $stuff_to_parse = preg_replace(
3324 '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
3325 $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER],
3326 $stuff_to_parse);
3328 } else {
3329 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3330 // produce valid HTML when we match multiple lines
3331 $this->_hmr_key = $key;
3332 $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
3333 array($this, 'handle_multiline_regexps'), $stuff_to_parse);
3334 $this->_hmr_key = '';
3335 } else {
3336 $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
3342 // Highlight numbers. As of 1.0.8 we support different types of numbers
3343 $numbers_found = false;
3344 if ($this->lexic_permissions['NUMBERS'] && preg_match('#\d#', $stuff_to_parse )) {
3345 $numbers_found = true;
3347 //For each of the formats ...
3348 foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3349 //Check if it should be highlighted ...
3350 $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
3355 // Now that's all done, replace /[number]/ with the correct styles
3357 foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3358 if (!$this->use_classes) {
3359 $attributes = ' style="' .
3360 (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
3361 $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
3362 } else {
3363 $attributes = ' class="kw' . $k . '"';
3365 $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
3368 if ($numbers_found) {
3369 // Put number styles in
3370 foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3371 //Commented out for now, as this needs some review ...
3372 // if ($numbers_permissions & $id) {
3373 //Get the appropriate style ...
3374 //Checking for unset styles is done by the style cache builder ...
3375 if (!$this->use_classes) {
3376 $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
3377 } else {
3378 $attributes = ' class="nu'.$id.'"';
3381 //Set in the correct styles ...
3382 $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
3383 // }
3387 // Highlight methods and fields in objects
3388 if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
3389 $oolang_spaces = "[\s]*";
3390 $oolang_before = "";
3391 $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
3392 if (isset($this->language_data['PARSER_CONTROL'])) {
3393 if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
3394 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
3395 $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
3397 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
3398 $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
3400 if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
3401 $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
3406 foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
3407 if (false !== strpos($stuff_to_parse, $splitter)) {
3408 if (!$this->use_classes) {
3409 $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
3410 } else {
3411 $attributes = ' class="me' . $key . '"';
3413 $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
3419 // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
3420 // You try it, and see what happens ;)
3421 // TODO: Fix lexic permissions not converting entities if shouldn't
3422 // be highlighting regardless
3424 if ($this->lexic_permissions['BRACKETS']) {
3425 $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
3426 $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
3430 //FIX for symbol highlighting ...
3431 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
3432 //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
3433 $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
3434 $global_offset = 0;
3435 for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
3436 $symbol_match = $pot_symbols[$s_id][0][0];
3437 if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
3438 // already highlighted blocks _must_ include either < or >
3439 // so if this conditional applies, we have to skip this match
3440 // BenBE: UNLESS the block contains <SEMI> or <PIPE>
3441 if(strpos($symbol_match, '<SEMI>') === false &&
3442 strpos($symbol_match, '<PIPE>') === false) {
3443 continue;
3447 // if we reach this point, we have a valid match which needs to be highlighted
3449 $symbol_length = strlen($symbol_match);
3450 $symbol_offset = $pot_symbols[$s_id][0][1];
3451 unset($pot_symbols[$s_id]);
3452 $symbol_end = $symbol_length + $symbol_offset;
3453 $symbol_hl = "";
3455 // if we have multiple styles, we have to handle them properly
3456 if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
3457 $old_sym = -1;
3458 // Split the current stuff to replace into its atomic symbols ...
3459 preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
3460 foreach ($sym_match_syms[0] as $sym_ms) {
3461 //Check if consequtive symbols belong to the same group to save output ...
3462 if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
3463 && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
3464 if (-1 != $old_sym) {
3465 $symbol_hl .= "|>";
3467 $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
3468 if (!$this->use_classes) {
3469 $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
3470 } else {
3471 $symbol_hl .= '<| class="sy' . $old_sym . '">';
3474 $symbol_hl .= $sym_ms;
3476 unset($sym_match_syms);
3478 //Close remaining tags and insert the replacement at the right position ...
3479 //Take caution if symbol_hl is empty to avoid doubled closing spans.
3480 if (-1 != $old_sym) {
3481 $symbol_hl .= "|>";
3483 } else {
3484 if (!$this->use_classes) {
3485 $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
3486 } else {
3487 $symbol_hl = '<| class="sy0">';
3489 $symbol_hl .= $symbol_match . '|>';
3492 $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
3494 // since we replace old text with something of different size,
3495 // we'll have to keep track of the differences
3496 $global_offset += strlen($symbol_hl) - $symbol_length;
3499 //FIX for symbol highlighting ...
3501 // Add class/style for regexps
3502 foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3503 if ($this->lexic_permissions['REGEXPS'][$key]) {
3504 if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
3505 $this->_rx_key = $key;
3506 $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
3507 array($this, 'handle_regexps_callback'),
3508 $stuff_to_parse);
3509 } else {
3510 if (!$this->use_classes) {
3511 $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
3512 } else {
3513 if (is_array($this->language_data['REGEXPS'][$key]) &&
3514 array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
3515 $attributes = ' class="' .
3516 $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
3517 } else {
3518 $attributes = ' class="re' . $key . '"';
3521 $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
3526 // Replace <DOT> with . for urls
3527 $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
3528 // Replace <|UR1| with <a href= for urls also
3529 if (isset($this->link_styles[GESHI_LINK])) {
3530 if ($this->use_classes) {
3531 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3532 } else {
3533 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
3535 } else {
3536 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3540 // NOW we add the span thingy ;)
3543 $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
3544 $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
3545 return substr($stuff_to_parse, 1);
3549 * Sets the time taken to parse the code
3551 * @param microtime The time when parsing started
3552 * @param microtime The time when parsing ended
3553 * @since 1.0.2
3554 * @access private
3556 function set_time($start_time, $end_time) {
3557 $start = explode(' ', $start_time);
3558 $end = explode(' ', $end_time);
3559 $this->time = $end[0] + $end[1] - $start[0] - $start[1];
3563 * Gets the time taken to parse the code
3565 * @return double The time taken to parse the code
3566 * @since 1.0.2
3568 function get_time() {
3569 return $this->time;
3573 * Merges arrays recursively, overwriting values of the first array with values of later arrays
3575 * @since 1.0.8
3576 * @access private
3578 function merge_arrays() {
3579 $arrays = func_get_args();
3580 $narrays = count($arrays);
3582 // check arguments
3583 // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
3584 for ($i = 0; $i < $narrays; $i ++) {
3585 if (!is_array($arrays[$i])) {
3586 // also array_merge_recursive returns nothing in this case
3587 trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
3588 return false;
3592 // the first array is in the output set in every case
3593 $ret = $arrays[0];
3595 // merege $ret with the remaining arrays
3596 for ($i = 1; $i < $narrays; $i ++) {
3597 foreach ($arrays[$i] as $key => $value) {
3598 if (is_array($value) && isset($ret[$key])) {
3599 // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
3600 // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
3601 $ret[$key] = $this->merge_arrays($ret[$key], $value);
3602 } else {
3603 $ret[$key] = $value;
3608 return $ret;
3612 * Gets language information and stores it for later use
3614 * @param string The filename of the language file you want to load
3615 * @since 1.0.0
3616 * @access private
3617 * @todo Needs to load keys for lexic permissions for keywords, regexps etc
3619 function load_language($file_name) {
3620 if ($file_name == $this->loaded_language) {
3621 // this file is already loaded!
3622 return;
3625 //Prepare some stuff before actually loading the language file
3626 $this->loaded_language = $file_name;
3627 $this->parse_cache_built = false;
3628 $this->enable_highlighting();
3629 $language_data = array();
3631 //Load the language file
3632 require $file_name;
3634 // Perhaps some checking might be added here later to check that
3635 // $language data is a valid thing but maybe not
3636 $this->language_data = $language_data;
3638 // Set strict mode if should be set
3639 $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
3641 // Set permissions for all lexics to true
3642 // so they'll be highlighted by default
3643 foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
3644 if (!empty($this->language_data['KEYWORDS'][$key])) {
3645 $this->lexic_permissions['KEYWORDS'][$key] = true;
3646 } else {
3647 $this->lexic_permissions['KEYWORDS'][$key] = false;
3651 foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
3652 $this->lexic_permissions['COMMENTS'][$key] = true;
3654 foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3655 $this->lexic_permissions['REGEXPS'][$key] = true;
3658 // for BenBE and future code reviews:
3659 // we can use empty here since we only check for existance and emptiness of an array
3660 // if it is not an array at all but rather false or null this will work as intended as well
3661 // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
3662 if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
3663 foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
3664 // it's either true or false and maybe is true as well
3665 $perm = $value !== GESHI_NEVER;
3666 if ($flag == 'ALL') {
3667 $this->enable_highlighting($perm);
3668 continue;
3670 if (!isset($this->lexic_permissions[$flag])) {
3671 // unknown lexic permission
3672 continue;
3674 if (is_array($this->lexic_permissions[$flag])) {
3675 foreach ($this->lexic_permissions[$flag] as $key => $val) {
3676 $this->lexic_permissions[$flag][$key] = $perm;
3678 } else {
3679 $this->lexic_permissions[$flag] = $perm;
3682 unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
3685 //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
3686 //You need to set one for HARDESCAPES only in this case.
3687 if(!isset($this->language_data['HARDCHAR'])) {
3688 $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
3691 //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
3692 $style_filename = substr($file_name, 0, -4) . '.style.php';
3693 if (is_readable($style_filename)) {
3694 //Clear any style_data that could have been set before ...
3695 if (isset($style_data)) {
3696 unset($style_data);
3699 //Read the Style Information from the style file
3700 include $style_filename;
3702 //Apply the new styles to our current language styles
3703 if (isset($style_data) && is_array($style_data)) {
3704 $this->language_data['STYLES'] =
3705 $this->merge_arrays($this->language_data['STYLES'], $style_data);
3711 * Takes the parsed code and various options, and creates the HTML
3712 * surrounding it to make it look nice.
3714 * @param string The code already parsed (reference!)
3715 * @since 1.0.0
3716 * @access private
3718 function finalise(&$parsed_code) {
3719 // Remove end parts of important declarations
3720 // This is BUGGY!! My fault for bad code: fix coming in 1.2
3721 // @todo Remove this crap
3722 if ($this->enable_important_blocks &&
3723 (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
3724 $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
3727 // Add HTML whitespace stuff if we're using the <div> header
3728 if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) {
3729 $this->indent($parsed_code);
3732 // purge some unnecessary stuff
3733 /** NOTE: memorypeak #1 */
3734 $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
3736 // If we are using IDs for line numbers, there needs to be an overall
3737 // ID set to prevent collisions.
3738 if ($this->add_ids && !$this->overall_id) {
3739 $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
3742 // Get code into lines
3743 /** NOTE: memorypeak #2 */
3744 $code = explode("\n", $parsed_code);
3745 $parsed_code = $this->header();
3747 // If we're using line numbers, we insert <li>s and appropriate
3748 // markup to style them (otherwise we don't need to do anything)
3749 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3750 // If we're using the <pre> header, we shouldn't add newlines because
3751 // the <pre> will line-break them (and the <li>s already do this for us)
3752 $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : '';
3754 // Set vars to defaults for following loop
3755 $i = 0;
3757 // Foreach line...
3758 for ($i = 0, $n = count($code); $i < $n;) {
3759 //Reset the attributes for a new line ...
3760 $attrs = array();
3762 // Make lines have at least one space in them if they're empty
3763 // BenBE: Checking emptiness using trim instead of relying on blanks
3764 if ('' == trim($code[$i])) {
3765 $code[$i] = '&nbsp;';
3768 // If this is a "special line"...
3769 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3770 $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3771 // Set the attributes to style the line
3772 if ($this->use_classes) {
3773 //$attr = ' class="li2"';
3774 $attrs['class'][] = 'li2';
3775 $def_attr = ' class="de2"';
3776 } else {
3777 //$attr = ' style="' . $this->line_style2 . '"';
3778 $attrs['style'][] = $this->line_style2;
3779 // This style "covers up" the special styles set for special lines
3780 // so that styles applied to special lines don't apply to the actual
3781 // code on that line
3782 $def_attr = ' style="' . $this->code_style . '"';
3784 } else {
3785 if ($this->use_classes) {
3786 //$attr = ' class="li1"';
3787 $attrs['class'][] = 'li1';
3788 $def_attr = ' class="de1"';
3789 } else {
3790 //$attr = ' style="' . $this->line_style1 . '"';
3791 $attrs['style'][] = $this->line_style1;
3792 $def_attr = ' style="' . $this->code_style . '"';
3796 //Check which type of tag to insert for this line
3797 if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3798 $start = "<pre$def_attr>";
3799 $end = '</pre>';
3800 } else {
3801 // Span or div?
3802 $start = "<div$def_attr>";
3803 $end = '</div>';
3806 ++$i;
3808 // Are we supposed to use ids? If so, add them
3809 if ($this->add_ids) {
3810 $attrs['id'][] = "$this->overall_id-$i";
3813 //Is this some line with extra styles???
3814 if (in_array($i, $this->highlight_extra_lines)) {
3815 if ($this->use_classes) {
3816 if (isset($this->highlight_extra_lines_styles[$i])) {
3817 $attrs['class'][] = "lx$i";
3818 } else {
3819 $attrs['class'][] = "ln-xtra";
3821 } else {
3822 array_push($attrs['style'], $this->get_line_style($i));
3826 // Add in the line surrounded by appropriate list HTML
3827 $attr_string = '';
3828 foreach ($attrs as $key => $attr) {
3829 $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
3832 $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
3833 unset($code[$i - 1]);
3835 } else {
3836 $n = count($code);
3837 if ($this->use_classes) {
3838 $attributes = ' class="de1"';
3839 } else {
3840 $attributes = ' style="'. $this->code_style .'"';
3842 if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3843 $parsed_code .= '<pre'. $attributes .'>';
3844 } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
3845 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3846 if ($this->use_classes) {
3847 $attrs = ' class="ln"';
3848 } else {
3849 $attrs = ' style="'. $this->table_linenumber_style .'"';
3851 $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
3852 // get linenumbers
3853 // we don't merge it with the for below, since it should be better for
3854 // memory consumption this way
3855 // @todo: but... actually it would still be somewhat nice to merge the two loops
3856 // the mem peaks are at different positions
3857 for ($i = 0; $i < $n; ++$i) {
3858 $close = 0;
3859 // fancy lines
3860 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3861 $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3862 // Set the attributes to style the line
3863 if ($this->use_classes) {
3864 $parsed_code .= '<span class="xtra li2"><span class="de2">';
3865 } else {
3866 // This style "covers up" the special styles set for special lines
3867 // so that styles applied to special lines don't apply to the actual
3868 // code on that line
3869 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3870 .'<span style="' . $this->code_style .'">';
3872 $close += 2;
3874 //Is this some line with extra styles???
3875 if (in_array($i + 1, $this->highlight_extra_lines)) {
3876 if ($this->use_classes) {
3877 if (isset($this->highlight_extra_lines_styles[$i])) {
3878 $parsed_code .= "<span class=\"xtra lx$i\">";
3879 } else {
3880 $parsed_code .= "<span class=\"xtra ln-xtra\">";
3882 } else {
3883 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3885 ++$close;
3887 $parsed_code .= $this->line_numbers_start + $i;
3888 if ($close) {
3889 $parsed_code .= str_repeat('</span>', $close);
3890 } else if ($i != $n) {
3891 $parsed_code .= "\n";
3894 $parsed_code .= '</pre></td><td'.$attributes.'>';
3896 $parsed_code .= '<pre'. $attributes .'>';
3898 // No line numbers, but still need to handle highlighting lines extra.
3899 // Have to use divs so the full width of the code is highlighted
3900 $close = 0;
3901 for ($i = 0; $i < $n; ++$i) {
3902 // Make lines have at least one space in them if they're empty
3903 // BenBE: Checking emptiness using trim instead of relying on blanks
3904 if ('' == trim($code[$i])) {
3905 $code[$i] = '&nbsp;';
3907 // fancy lines
3908 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3909 $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3910 // Set the attributes to style the line
3911 if ($this->use_classes) {
3912 $parsed_code .= '<span class="xtra li2"><span class="de2">';
3913 } else {
3914 // This style "covers up" the special styles set for special lines
3915 // so that styles applied to special lines don't apply to the actual
3916 // code on that line
3917 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3918 .'<span style="' . $this->code_style .'">';
3920 $close += 2;
3922 //Is this some line with extra styles???
3923 if (in_array($i + 1, $this->highlight_extra_lines)) {
3924 if ($this->use_classes) {
3925 if (isset($this->highlight_extra_lines_styles[$i])) {
3926 $parsed_code .= "<span class=\"xtra lx$i\">";
3927 } else {
3928 $parsed_code .= "<span class=\"xtra ln-xtra\">";
3930 } else {
3931 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3933 ++$close;
3936 $parsed_code .= $code[$i];
3938 if ($close) {
3939 $parsed_code .= str_repeat('</span>', $close);
3940 $close = 0;
3942 elseif ($i + 1 < $n) {
3943 $parsed_code .= "\n";
3945 unset($code[$i]);
3948 if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) {
3949 $parsed_code .= '</pre>';
3951 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3952 $parsed_code .= '</td>';
3956 $parsed_code .= $this->footer();
3960 * Creates the header for the code block (with correct attributes)
3962 * @return string The header for the code block
3963 * @since 1.0.0
3964 * @access private
3966 function header() {
3967 // Get attributes needed
3969 * @todo Document behaviour change - class is outputted regardless of whether
3970 * we're using classes or not. Same with style
3972 $attributes = ' class="' . $this->language;
3973 if ($this->overall_class != '') {
3974 $attributes .= " ".$this->overall_class;
3976 $attributes .= '"';
3978 if ($this->overall_id != '') {
3979 $attributes .= " id=\"{$this->overall_id}\"";
3981 if ($this->overall_style != '') {
3982 $attributes .= ' style="' . $this->overall_style . '"';
3985 $ol_attributes = '';
3987 if ($this->line_numbers_start != 1) {
3988 $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
3991 // Get the header HTML
3992 $header = $this->header_content;
3993 if ($header) {
3994 if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) {
3995 $header = str_replace("\n", '', $header);
3997 $header = $this->replace_keywords($header);
3999 if ($this->use_classes) {
4000 $attr = ' class="head"';
4001 } else {
4002 $attr = " style=\"{$this->header_content_style}\"";
4004 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4005 $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
4006 } else {
4007 $header = "<div$attr>$header</div>";
4011 if (GESHI_HEADER_NONE == $this->header_type) {
4012 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4013 return "$header<ol$attributes$ol_attributes>";
4015 return $header . ($this->force_code_block ? '<div>' : '');
4018 // Work out what to return and do it
4019 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4020 if ($this->header_type == GESHI_HEADER_PRE) {
4021 return "<pre$attributes>$header<ol$ol_attributes>";
4022 } else if ($this->header_type == GESHI_HEADER_DIV ||
4023 $this->header_type == GESHI_HEADER_PRE_VALID) {
4024 return "<div$attributes>$header<ol$ol_attributes>";
4025 } else if ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4026 return "<table$attributes>$header<tbody><tr class=\"li1\">";
4028 } else {
4029 if ($this->header_type == GESHI_HEADER_PRE) {
4030 return "<pre$attributes>$header" .
4031 ($this->force_code_block ? '<div>' : '');
4032 } else {
4033 return "<div$attributes>$header" .
4034 ($this->force_code_block ? '<div>' : '');
4040 * Returns the footer for the code block.
4042 * @return string The footer for the code block
4043 * @since 1.0.0
4044 * @access private
4046 function footer() {
4047 $footer = $this->footer_content;
4048 if ($footer) {
4049 if ($this->header_type == GESHI_HEADER_PRE) {
4050 $footer = str_replace("\n", '', $footer);;
4052 $footer = $this->replace_keywords($footer);
4054 if ($this->use_classes) {
4055 $attr = ' class="foot"';
4056 } else {
4057 $attr = " style=\"{$this->footer_content_style}\"";
4059 if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4060 $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
4061 } else {
4062 $footer = "<div$attr>$footer</div>";
4066 if (GESHI_HEADER_NONE == $this->header_type) {
4067 return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer;
4070 if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) {
4071 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4072 return "</ol>$footer</div>";
4074 return ($this->force_code_block ? '</div>' : '') .
4075 "$footer</div>";
4077 elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4078 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4079 return "</tr></tbody>$footer</table>";
4081 return ($this->force_code_block ? '</div>' : '') .
4082 "$footer</div>";
4084 else {
4085 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4086 return "</ol>$footer</pre>";
4088 return ($this->force_code_block ? '</div>' : '') .
4089 "$footer</pre>";
4094 * Replaces certain keywords in the header and footer with
4095 * certain configuration values
4097 * @param string The header or footer content to do replacement on
4098 * @return string The header or footer with replaced keywords
4099 * @since 1.0.2
4100 * @access private
4102 function replace_keywords($instr) {
4103 $keywords = $replacements = array();
4105 $keywords[] = '<TIME>';
4106 $keywords[] = '{TIME}';
4107 $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
4109 $keywords[] = '<LANGUAGE>';
4110 $keywords[] = '{LANGUAGE}';
4111 $replacements[] = $replacements[] = $this->language_data['LANG_NAME'];
4113 $keywords[] = '<VERSION>';
4114 $keywords[] = '{VERSION}';
4115 $replacements[] = $replacements[] = GESHI_VERSION;
4117 $keywords[] = '<SPEED>';
4118 $keywords[] = '{SPEED}';
4119 if ($time <= 0) {
4120 $speed = 'N/A';
4121 } else {
4122 $speed = strlen($this->source) / $time;
4123 if ($speed >= 1024) {
4124 $speed = sprintf("%.2f KB/s", $speed / 1024.0);
4125 } else {
4126 $speed = sprintf("%.0f B/s", $speed);
4129 $replacements[] = $replacements[] = $speed;
4131 return str_replace($keywords, $replacements, $instr);
4135 * Secure replacement for PHP built-in function htmlspecialchars().
4137 * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale
4138 * for this replacement function.
4140 * The INTERFACE for this function is almost the same as that for
4141 * htmlspecialchars(), with the same default for quote style; however, there
4142 * is no 'charset' parameter. The reason for this is as follows:
4144 * The PHP docs say:
4145 * "The third argument charset defines character set used in conversion."
4147 * I suspect PHP's htmlspecialchars() is working at the byte-value level and
4148 * thus _needs_ to know (or asssume) a character set because the special
4149 * characters to be replaced could exist at different code points in
4150 * different character sets. (If indeed htmlspecialchars() works at
4151 * byte-value level that goes some way towards explaining why the
4152 * vulnerability would exist in this function, too, and not only in
4153 * htmlentities() which certainly is working at byte-value level.)
4155 * This replacement function however works at character level and should
4156 * therefore be "immune" to character set differences - so no charset
4157 * parameter is needed or provided. If a third parameter is passed, it will
4158 * be silently ignored.
4160 * In the OUTPUT there is a minor difference in that we use '&#39;' instead
4161 * of PHP's '&#039;' for a single quote: this provides compatibility with
4162 * get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES)
4163 * (see comment by mikiwoz at yahoo dot co dot uk on
4164 * http://php.net/htmlspecialchars); it also matches the entity definition
4165 * for XML 1.0
4166 * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters).
4167 * Like PHP we use a numeric character reference instead of '&apos;' for the
4168 * single quote. For the other special characters we use the named entity
4169 * references, as PHP is doing.
4171 * @author {@link http://wikkawiki.org/JavaWoman Marjolein Katsma}
4173 * @license http://www.gnu.org/copyleft/lgpl.html
4174 * GNU Lesser General Public License
4175 * @copyright Copyright 2007, {@link http://wikkawiki.org/CreditsPage
4176 * Wikka Development Team}
4178 * @access private
4179 * @param string $string string to be converted
4180 * @param integer $quote_style
4181 * - ENT_COMPAT: escapes &, <, > and double quote (default)
4182 * - ENT_NOQUOTES: escapes only &, < and >
4183 * - ENT_QUOTES: escapes &, <, >, double and single quotes
4184 * @return string converted string
4185 * @since 1.0.7.18
4187 function hsc($string, $quote_style = ENT_COMPAT) {
4188 // init
4189 static $aTransSpecchar = array(
4190 '&' => '&amp;',
4191 '"' => '&quot;',
4192 '<' => '&lt;',
4193 '>' => '&gt;',
4195 //This fix is related to SF#1923020, but has to be applied
4196 //regardless of actually highlighting symbols.
4198 //Circumvent a bug with symbol highlighting
4199 //This is required as ; would produce undesirable side-effects if it
4200 //was not to be processed as an entity.
4201 ';' => '<SEMI>', // Force ; to be processed as entity
4202 '|' => '<PIPE>' // Force | to be processed as entity
4203 ); // ENT_COMPAT set
4205 switch ($quote_style) {
4206 case ENT_NOQUOTES: // don't convert double quotes
4207 unset($aTransSpecchar['"']);
4208 break;
4209 case ENT_QUOTES: // convert single quotes as well
4210 $aTransSpecchar["'"] = '&#39;'; // (apos) htmlspecialchars() uses '&#039;'
4211 break;
4214 // return translated string
4215 return strtr($string, $aTransSpecchar);
4219 * Returns a stylesheet for the highlighted code. If $economy mode
4220 * is true, we only return the stylesheet declarations that matter for
4221 * this code block instead of the whole thing
4223 * @param boolean Whether to use economy mode or not
4224 * @return string A stylesheet built on the data for the current language
4225 * @since 1.0.0
4227 function get_stylesheet($economy_mode = true) {
4228 // If there's an error, chances are that the language file
4229 // won't have populated the language data file, so we can't
4230 // risk getting a stylesheet...
4231 if ($this->error) {
4232 return '';
4235 //Check if the style rearrangements have been processed ...
4236 //This also does some preprocessing to check which style groups are useable ...
4237 if(!isset($this->language_data['NUMBERS_CACHE'])) {
4238 $this->build_style_cache();
4241 // First, work out what the selector should be. If there's an ID,
4242 // that should be used, the same for a class. Otherwise, a selector
4243 // of '' means that these styles will be applied anywhere
4244 if ($this->overall_id) {
4245 $selector = '#' . $this->overall_id;
4246 } else {
4247 $selector = '.' . $this->language;
4248 if ($this->overall_class) {
4249 $selector .= '.' . $this->overall_class;
4252 $selector .= ' ';
4254 // Header of the stylesheet
4255 if (!$economy_mode) {
4256 $stylesheet = "/**\n".
4257 " * GeSHi Dynamically Generated Stylesheet\n".
4258 " * --------------------------------------\n".
4259 " * Dynamically generated stylesheet for {$this->language}\n".
4260 " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
4261 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4262 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4263 " * --------------------------------------\n".
4264 " */\n";
4265 } else {
4266 $stylesheet = "/**\n".
4267 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4268 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4269 " */\n";
4272 // Set the <ol> to have no effect at all if there are line numbers
4273 // (<ol>s have margins that should be destroyed so all layout is
4274 // controlled by the set_overall_style method, which works on the
4275 // <pre> or <div> container). Additionally, set default styles for lines
4276 if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4277 //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
4278 $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
4281 // Add overall styles
4282 // note: neglect economy_mode, empty styles are meaningless
4283 if ($this->overall_style != '') {
4284 $stylesheet .= "$selector {{$this->overall_style}}\n";
4287 // Add styles for links
4288 // note: economy mode does not make _any_ sense here
4289 // either the style is empty and thus no selector is needed
4290 // or the appropriate key is given.
4291 foreach ($this->link_styles as $key => $style) {
4292 if ($style != '') {
4293 switch ($key) {
4294 case GESHI_LINK:
4295 $stylesheet .= "{$selector}a:link {{$style}}\n";
4296 break;
4297 case GESHI_HOVER:
4298 $stylesheet .= "{$selector}a:hover {{$style}}\n";
4299 break;
4300 case GESHI_ACTIVE:
4301 $stylesheet .= "{$selector}a:active {{$style}}\n";
4302 break;
4303 case GESHI_VISITED:
4304 $stylesheet .= "{$selector}a:visited {{$style}}\n";
4305 break;
4310 // Header and footer
4311 // note: neglect economy_mode, empty styles are meaningless
4312 if ($this->header_content_style != '') {
4313 $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
4315 if ($this->footer_content_style != '') {
4316 $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
4319 // Styles for important stuff
4320 // note: neglect economy_mode, empty styles are meaningless
4321 if ($this->important_styles != '') {
4322 $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
4325 // Simple line number styles
4326 if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') {
4327 $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
4329 if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') {
4330 $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
4332 // If there is a style set for fancy line numbers, echo it out
4333 if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') {
4334 $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
4337 // note: empty styles are meaningless
4338 foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
4339 if ($styles != '' && (!$economy_mode ||
4340 (isset($this->lexic_permissions['KEYWORDS'][$group]) &&
4341 $this->lexic_permissions['KEYWORDS'][$group]))) {
4342 $stylesheet .= "$selector.kw$group {{$styles}}\n";
4345 foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
4346 if ($styles != '' && (!$economy_mode ||
4347 (isset($this->lexic_permissions['COMMENTS'][$group]) &&
4348 $this->lexic_permissions['COMMENTS'][$group]) ||
4349 (!empty($this->language_data['COMMENT_REGEXP']) &&
4350 !empty($this->language_data['COMMENT_REGEXP'][$group])))) {
4351 $stylesheet .= "$selector.co$group {{$styles}}\n";
4354 foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
4355 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) {
4356 // NEW: since 1.0.8 we have to handle hardescapes
4357 if ($group === 'HARD') {
4358 $group = '_h';
4360 $stylesheet .= "$selector.es$group {{$styles}}\n";
4363 foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) {
4364 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) {
4365 $stylesheet .= "$selector.br$group {{$styles}}\n";
4368 foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
4369 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) {
4370 $stylesheet .= "$selector.sy$group {{$styles}}\n";
4373 foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
4374 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) {
4375 // NEW: since 1.0.8 we have to handle hardquotes
4376 if ($group === 'HARD') {
4377 $group = '_h';
4379 $stylesheet .= "$selector.st$group {{$styles}}\n";
4382 foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
4383 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) {
4384 $stylesheet .= "$selector.nu$group {{$styles}}\n";
4387 foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
4388 if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) {
4389 $stylesheet .= "$selector.me$group {{$styles}}\n";
4392 // note: neglect economy_mode, empty styles are meaningless
4393 foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
4394 if ($styles != '') {
4395 $stylesheet .= "$selector.sc$group {{$styles}}\n";
4398 foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
4399 if ($styles != '' && (!$economy_mode ||
4400 (isset($this->lexic_permissions['REGEXPS'][$group]) &&
4401 $this->lexic_permissions['REGEXPS'][$group]))) {
4402 if (is_array($this->language_data['REGEXPS'][$group]) &&
4403 array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) {
4404 $stylesheet .= "$selector.";
4405 $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS];
4406 $stylesheet .= " {{$styles}}\n";
4407 } else {
4408 $stylesheet .= "$selector.re$group {{$styles}}\n";
4412 // Styles for lines being highlighted extra
4413 if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) {
4414 $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
4416 $stylesheet .= "{$selector}span.xtra { display:block; }\n";
4417 foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) {
4418 $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
4421 return $stylesheet;
4425 * Get's the style that is used for the specified line
4427 * @param int The line number information is requested for
4428 * @access private
4429 * @since 1.0.7.21
4431 function get_line_style($line) {
4432 //$style = null;
4433 $style = null;
4434 if (isset($this->highlight_extra_lines_styles[$line])) {
4435 $style = $this->highlight_extra_lines_styles[$line];
4436 } else { // if no "extra" style assigned
4437 $style = $this->highlight_extra_lines_style;
4440 return $style;
4444 * this functions creates an optimized regular expression list
4445 * of an array of strings.
4447 * Example:
4448 * <code>$list = array('faa', 'foo', 'foobar');
4449 * => string 'f(aa|oo(bar)?)'</code>
4451 * @param $list array of (unquoted) strings
4452 * @param $regexp_delimiter your regular expression delimiter, @see preg_quote()
4453 * @return string for regular expression
4454 * @author Milian Wolff <mail@milianw.de>
4455 * @since 1.0.8
4456 * @access private
4458 function optimize_regexp_list($list, $regexp_delimiter = '/') {
4459 $regex_chars = array('.', '\\', '+', '*', '?', '[', '^', ']', '$',
4460 '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter);
4461 sort($list);
4462 $regexp_list = array('');
4463 $num_subpatterns = 0;
4464 $list_key = 0;
4466 // the tokens which we will use to generate the regexp list
4467 $tokens = array();
4468 $prev_keys = array();
4469 // go through all entries of the list and generate the token list
4470 $cur_len = 0;
4471 for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
4472 if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
4473 // seems like the length of this pcre is growing exorbitantly
4474 $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
4475 $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
4476 $tokens = array();
4477 $cur_len = 0;
4479 $level = 0;
4480 $entry = preg_quote((string) $list[$i], $regexp_delimiter);
4481 $pointer = &$tokens;
4482 // properly assign the new entry to the correct position in the token array
4483 // possibly generate smaller common denominator keys
4484 while (true) {
4485 // get the common denominator
4486 if (isset($prev_keys[$level])) {
4487 if ($prev_keys[$level] == $entry) {
4488 // this is a duplicate entry, skip it
4489 continue 2;
4491 $char = 0;
4492 while (isset($entry[$char]) && isset($prev_keys[$level][$char])
4493 && $entry[$char] == $prev_keys[$level][$char]) {
4494 ++$char;
4496 if ($char > 0) {
4497 // this entry has at least some chars in common with the current key
4498 if ($char == strlen($prev_keys[$level])) {
4499 // current key is totally matched, i.e. this entry has just some bits appended
4500 $pointer = &$pointer[$prev_keys[$level]];
4501 } else {
4502 // only part of the keys match
4503 $new_key_part1 = substr($prev_keys[$level], 0, $char);
4504 $new_key_part2 = substr($prev_keys[$level], $char);
4506 if (in_array($new_key_part1[0], $regex_chars)
4507 || in_array($new_key_part2[0], $regex_chars)) {
4508 // this is bad, a regex char as first character
4509 $pointer[$entry] = array('' => true);
4510 array_splice($prev_keys, $level, count($prev_keys), $entry);
4511 $cur_len += strlen($entry);
4512 continue;
4513 } else {
4514 // relocate previous tokens
4515 $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
4516 unset($pointer[$prev_keys[$level]]);
4517 $pointer = &$pointer[$new_key_part1];
4518 // recreate key index
4519 array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
4520 $cur_len += strlen($new_key_part2);
4523 ++$level;
4524 $entry = substr($entry, $char);
4525 continue;
4527 // else: fall trough, i.e. no common denominator was found
4529 if ($level == 0 && !empty($tokens)) {
4530 // we can dump current tokens into the string and throw them away afterwards
4531 $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4532 $new_subpatterns = substr_count($new_entry, '(?:');
4533 if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) {
4534 $regexp_list[++$list_key] = $new_entry;
4535 $num_subpatterns = $new_subpatterns;
4536 } else {
4537 if (!empty($regexp_list[$list_key])) {
4538 $new_entry = '|' . $new_entry;
4540 $regexp_list[$list_key] .= $new_entry;
4541 $num_subpatterns += $new_subpatterns;
4543 $tokens = array();
4544 $cur_len = 0;
4546 // no further common denominator found
4547 $pointer[$entry] = array('' => true);
4548 array_splice($prev_keys, $level, count($prev_keys), $entry);
4550 $cur_len += strlen($entry);
4551 break;
4553 unset($list[$i]);
4555 // make sure the last tokens get converted as well
4556 $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4557 if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) {
4558 $regexp_list[++$list_key] = $new_entry;
4559 } else {
4560 if (!empty($regexp_list[$list_key])) {
4561 $new_entry = '|' . $new_entry;
4563 $regexp_list[$list_key] .= $new_entry;
4565 return $regexp_list;
4568 * this function creates the appropriate regexp string of an token array
4569 * you should not call this function directly, @see $this->optimize_regexp_list().
4571 * @param &$tokens array of tokens
4572 * @param $recursed bool to know wether we recursed or not
4573 * @return string
4574 * @author Milian Wolff <mail@milianw.de>
4575 * @since 1.0.8
4576 * @access private
4578 function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
4579 $list = '';
4580 foreach ($tokens as $token => $sub_tokens) {
4581 $list .= $token;
4582 $close_entry = isset($sub_tokens['']);
4583 unset($sub_tokens['']);
4584 if (!empty($sub_tokens)) {
4585 $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
4586 if ($close_entry) {
4587 // make sub_tokens optional
4588 $list .= '?';
4591 $list .= '|';
4593 if (!$recursed) {
4594 // do some optimizations
4595 // common trailing strings
4596 // BUGGY!
4597 //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function(
4598 // '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
4599 // (?:p)? => p?
4600 $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list);
4601 // (?:a|b|c|d|...)? => [abcd...]?
4602 // TODO: a|bb|c => [ac]|bb
4603 static $callback_2;
4604 if (!isset($callback_2)) {
4605 $callback_2 = create_function('$matches', 'return "[" . str_replace("|", "", $matches[1]) . "]";');
4607 $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list);
4609 // return $list without trailing pipe
4610 return substr($list, 0, -1);
4612 } // End Class GeSHi
4615 if (!function_exists('geshi_highlight')) {
4617 * Easy way to highlight stuff. Behaves just like highlight_string
4619 * @param string The code to highlight
4620 * @param string The language to highlight the code in
4621 * @param string The path to the language files. You can leave this blank if you need
4622 * as from version 1.0.7 the path should be automatically detected
4623 * @param boolean Whether to return the result or to echo
4624 * @return string The code highlighted (if $return is true)
4625 * @since 1.0.2
4627 function geshi_highlight($string, $language, $path = null, $return = false) {
4628 $geshi = new GeSHi($string, $language, $path);
4629 $geshi->set_header_type(GESHI_HEADER_NONE);
4631 if ($return) {
4632 return '<code>' . $geshi->parse_code() . '</code>';
4635 echo '<code>' . $geshi->parse_code() . '</code>';
4637 if ($geshi->error()) {
4638 return false;
4640 return true;