inc/geshi.php

   1 <?php
   2 /**
   3  * GeSHi - Generic Syntax Highlighter
   4  *
   5  * The GeSHi class for Generic Syntax Highlighting. Please refer to the
   6  * documentation at http://qbnz.com/highlighter/documentation.php for more
   7  * information about how to use this class.
   8  *
   9  * For changes, release notes, TODOs etc, see the relevant files in the docs/
  10  * directory.
  11  *
  12  *   This file is part of GeSHi.
  13  *
  14  *  GeSHi is free software; you can redistribute it and/or modify
  15  *  it under the terms of the GNU General Public License as published by
  16  *  the Free Software Foundation; either version 2 of the License, or
  17  *  (at your option) any later version.
  18  *
  19  *  GeSHi is distributed in the hope that it will be useful,
  20  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  21  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22  *  GNU General Public License for more details.
  23  *
  24  *  You should have received a copy of the GNU General Public License
  25  *  along with GeSHi; if not, write to the Free Software
  26  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  27  *
  28  * @package    geshi
  29  * @subpackage core
  30  * @author     Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
  31  * @copyright  (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
  32  * @license    http://gnu.org/copyleft/gpl.html GNU GPL
  33  *
  34  */
  35
  36 //
  37 // GeSHi Constants
  38 // You should use these constant names in your programs instead of
  39 // their values - you never know when a value may change in a future
  40 // version
  41 //
  42
  43 /** The version of this GeSHi file */
  44 define('GESHI_VERSION', '1.0.8.4');
  45
  46 // Define the root directory for the GeSHi code tree
  47 if (!defined('GESHI_ROOT')) {
  48     /** The root directory for GeSHi */
  49     define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
  50 }
  51 /** The language file directory for GeSHi
  52     @access private */
  53 define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
  54
  55 // Define if GeSHi should be paranoid about security
  56 if (!defined('GESHI_SECURITY_PARANOID')) {
  57     /** Tells GeSHi to be paranoid about security settings */
  58     define('GESHI_SECURITY_PARANOID', false);
  59 }
  60
  61 // Line numbers - use with enable_line_numbers()
  62 /** Use no line numbers when building the result */
  63 define('GESHI_NO_LINE_NUMBERS', 0);
  64 /** Use normal line numbers when building the result */
  65 define('GESHI_NORMAL_LINE_NUMBERS', 1);
  66 /** Use fancy line numbers when building the result */
  67 define('GESHI_FANCY_LINE_NUMBERS', 2);
  68
  69 // Container HTML type
  70 /** Use nothing to surround the source */
  71 define('GESHI_HEADER_NONE', 0);
  72 /** Use a "div" to surround the source */
  73 define('GESHI_HEADER_DIV', 1);
  74 /** Use a "pre" to surround the source */
  75 define('GESHI_HEADER_PRE', 2);
  76 /** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */
  77 define('GESHI_HEADER_PRE_VALID', 3);
  78 /**
  79  * Use a "table" to surround the source:
  80  *
  81  *  <table>
  82  *    <thead><tr><td colspan="2">$header</td></tr></thead>
  83  *    <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody>
  84  *    <tfooter><tr><td colspan="2">$footer</td></tr></tfoot>
  85  *  </table>
  86  *
  87  * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
  88  * https://bugzilla.mozilla.org/show_bug.cgi?id=365805
  89  * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
  90  */
  91 define('GESHI_HEADER_PRE_TABLE', 4);
  92
  93 // Capatalisation constants
  94 /** Lowercase keywords found */
  95 define('GESHI_CAPS_NO_CHANGE', 0);
  96 /** Uppercase keywords found */
  97 define('GESHI_CAPS_UPPER', 1);
  98 /** Leave keywords found as the case that they are */
  99 define('GESHI_CAPS_LOWER', 2);
 100
 101 // Link style constants
 102 /** Links in the source in the :link state */
 103 define('GESHI_LINK', 0);
 104 /** Links in the source in the :hover state */
 105 define('GESHI_HOVER', 1);
 106 /** Links in the source in the :active state */
 107 define('GESHI_ACTIVE', 2);
 108 /** Links in the source in the :visited state */
 109 define('GESHI_VISITED', 3);
 110
 111 // Important string starter/finisher
 112 // Note that if you change these, they should be as-is: i.e., don't
 113 // write them as if they had been run through htmlentities()
 114 /** The starter for important parts of the source */
 115 define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
 116 /** The ender for important parts of the source */
 117 define('GESHI_END_IMPORTANT', '<END GeSHi>');
 118
 119 /**#@+
 120  *  @access private
 121  */
 122 // When strict mode applies for a language
 123 /** Strict mode never applies (this is the most common) */
 124 define('GESHI_NEVER', 0);
 125 /** Strict mode *might* apply, and can be enabled or
 126     disabled by {@link GeSHi->enable_strict_mode()} */
 127 define('GESHI_MAYBE', 1);
 128 /** Strict mode always applies */
 129 define('GESHI_ALWAYS', 2);
 130
 131 // Advanced regexp handling constants, used in language files
 132 /** The key of the regex array defining what to search for */
 133 define('GESHI_SEARCH', 0);
 134 /** The key of the regex array defining what bracket group in a
 135     matched search to use as a replacement */
 136 define('GESHI_REPLACE', 1);
 137 /** The key of the regex array defining any modifiers to the regular expression */
 138 define('GESHI_MODIFIERS', 2);
 139 /** The key of the regex array defining what bracket group in a
 140     matched search to put before the replacement */
 141 define('GESHI_BEFORE', 3);
 142 /** The key of the regex array defining what bracket group in a
 143     matched search to put after the replacement */
 144 define('GESHI_AFTER', 4);
 145 /** The key of the regex array defining a custom keyword to use
 146     for this regexp's html tag class */
 147 define('GESHI_CLASS', 5);
 148
 149 /** Used in language files to mark comments */
 150 define('GESHI_COMMENTS', 0);
 151
 152 /** Used to work around missing PHP features **/
 153 define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1));
 154
 155 /** make sure we can call stripos **/
 156 if (!function_exists('stripos')) {
 157     // the offset param of preg_match is not supported below PHP 4.3.3
 158     if (GESHI_PHP_PRE_433) {
 159         /**
 160          * @ignore
 161          */
 162         function stripos($haystack, $needle, $offset = null) {
 163             if (!is_null($offset)) {
 164                 $haystack = substr($haystack, $offset);
 165             }
 166             if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE)) {
 167                 return $match[0][1];
 168             }
 169             return false;
 170         }
 171     }
 172     else {
 173         /**
 174          * @ignore
 175          */
 176         function stripos($haystack, $needle, $offset = null) {
 177             if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE, $offset)) {
 178                 return $match[0][1];
 179             }
 180             return false;
 181         }
 182     }
 183 }
 184
 185 /** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
 186     regular expressions. Set this to false if your PCRE lib is up to date
 187     @see GeSHi->optimize_regexp_list()
 188     **/
 189 define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
 190 /** it's also important not to generate too long regular expressions
 191     be generous here... but keep in mind, that when reaching this limit we
 192     still have to close open patterns. 12k should do just fine on a 16k limit.
 193     @see GeSHi->optimize_regexp_list()
 194     **/
 195 define('GESHI_MAX_PCRE_LENGTH', 12288);
 196
 197 //Number format specification
 198 /** Basic number format for integers */
 199 define('GESHI_NUMBER_INT_BASIC', 1);        //Default integers \d+
 200 /** Enhanced number format for integers like seen in C */
 201 define('GESHI_NUMBER_INT_CSTYLE', 2);       //Default C-Style \d+[lL]?
 202 /** Number format to highlight binary numbers with a suffix "b" */
 203 define('GESHI_NUMBER_BIN_SUFFIX', 16);           //[01]+[bB]
 204 /** Number format to highlight binary numbers with a prefix % */
 205 define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32);   //%[01]+
 206 /** Number format to highlight binary numbers with a prefix 0b (C) */
 207 define('GESHI_NUMBER_BIN_PREFIX_0B', 64);        //0b[01]+
 208 /** Number format to highlight octal numbers with a leading zero */
 209 define('GESHI_NUMBER_OCT_PREFIX', 256);           //0[0-7]+
 210 /** Number format to highlight octal numbers with a suffix of o */
 211 define('GESHI_NUMBER_OCT_SUFFIX', 512);           //[0-7]+[oO]
 212 /** Number format to highlight hex numbers with a prefix 0x */
 213 define('GESHI_NUMBER_HEX_PREFIX', 4096);           //0x[0-9a-fA-F]+
 214 /** Number format to highlight hex numbers with a suffix of h */
 215 define('GESHI_NUMBER_HEX_SUFFIX', 8192);           //[0-9][0-9a-fA-F]*h
 216 /** Number format to highlight floating-point numbers without support for scientific notation */
 217 define('GESHI_NUMBER_FLT_NONSCI', 65536);          //\d+\.\d+
 218 /** Number format to highlight floating-point numbers without support for scientific notation */
 219 define('GESHI_NUMBER_FLT_NONSCI_F', 131072);       //\d+(\.\d+)?f
 220 /** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
 221 define('GESHI_NUMBER_FLT_SCI_SHORT', 262144);      //\.\d+e\d+
 222 /** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
 223 define('GESHI_NUMBER_FLT_SCI_ZERO', 524288);       //\d+(\.\d+)?e\d+
 224 //Custom formats are passed by RX array
 225
 226 // Error detection - use these to analyse faults
 227 /** No sourcecode to highlight was specified
 228  * @deprecated
 229  */
 230 define('GESHI_ERROR_NO_INPUT', 1);
 231 /** The language specified does not exist */
 232 define('GESHI_ERROR_NO_SUCH_LANG', 2);
 233 /** GeSHi could not open a file for reading (generally a language file) */
 234 define('GESHI_ERROR_FILE_NOT_READABLE', 3);
 235 /** The header type passed to {@link GeSHi->set_header_type()} was invalid */
 236 define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
 237 /** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
 238 define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
 239 /**#@-*/
 240
 241
 242 /**
 243  * The GeSHi Class.
 244  *
 245  * Please refer to the documentation for GeSHi 1.0.X that is available
 246  * at http://qbnz.com/highlighter/documentation.php for more information
 247  * about how to use this class.
 248  *
 249  * @package   geshi
 250  * @author    Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
 251  * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
 252  */
 253 class GeSHi {
 254     /**#@+
 255      * @access private
 256      */
 257     /**
 258      * The source code to highlight
 259      * @var string
 260      */
 261     var $source = '';
 262
 263     /**
 264      * The language to use when highlighting
 265      * @var string
 266      */
 267     var $language = '';
 268
 269     /**
 270      * The data for the language used
 271      * @var array
 272      */
 273     var $language_data = array();
 274
 275     /**
 276      * The path to the language files
 277      * @var string
 278      */
 279     var $language_path = GESHI_LANG_ROOT;
 280
 281     /**
 282      * The error message associated with an error
 283      * @var string
 284      * @todo check err reporting works
 285      */
 286     var $error = false;
 287
 288     /**
 289      * Possible error messages
 290      * @var array
 291      */
 292     var $error_messages = array(
 293         GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
 294         GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
 295         GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
 296         GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
 297     );
 298
 299     /**
 300      * Whether highlighting is strict or not
 301      * @var boolean
 302      */
 303     var $strict_mode = false;
 304
 305     /**
 306      * Whether to use CSS classes in output
 307      * @var boolean
 308      */
 309     var $use_classes = false;
 310
 311     /**
 312      * The type of header to use. Can be one of the following
 313      * values:
 314      *
 315      * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element.
 316      * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element.
 317      * - GESHI_HEADER_NONE: No header is outputted.
 318      *
 319      * @var int
 320      */
 321     var $header_type = GESHI_HEADER_PRE;
 322
 323     /**
 324      * Array of permissions for which lexics should be highlighted
 325      * @var array
 326      */
 327     var $lexic_permissions = array(
 328         'KEYWORDS' =>    array(),
 329         'COMMENTS' =>    array('MULTI' => true),
 330         'REGEXPS' =>     array(),
 331         'ESCAPE_CHAR' => true,
 332         'BRACKETS' =>    true,
 333         'SYMBOLS' =>     false,
 334         'STRINGS' =>     true,
 335         'NUMBERS' =>     true,
 336         'METHODS' =>     true,
 337         'SCRIPT' =>      true
 338     );
 339
 340     /**
 341      * The time it took to parse the code
 342      * @var double
 343      */
 344     var $time = 0;
 345
 346     /**
 347      * The content of the header block
 348      * @var string
 349      */
 350     var $header_content = '';
 351
 352     /**
 353      * The content of the footer block
 354      * @var string
 355      */
 356     var $footer_content = '';
 357
 358     /**
 359      * The style of the header block
 360      * @var string
 361      */
 362     var $header_content_style = '';
 363
 364     /**
 365      * The style of the footer block
 366      * @var string
 367      */
 368     var $footer_content_style = '';
 369
 370     /**
 371      * Tells if a block around the highlighted source should be forced
 372      * if not using line numbering
 373      * @var boolean
 374      */
 375     var $force_code_block = false;
 376
 377     /**
 378      * The styles for hyperlinks in the code
 379      * @var array
 380      */
 381     var $link_styles = array();
 382
 383     /**
 384      * Whether important blocks should be recognised or not
 385      * @var boolean
 386      * @deprecated
 387      * @todo REMOVE THIS FUNCTIONALITY!
 388      */
 389     var $enable_important_blocks = false;
 390
 391     /**
 392      * Styles for important parts of the code
 393      * @var string
 394      * @deprecated
 395      * @todo As above - rethink the whole idea of important blocks as it is buggy and
 396      * will be hard to implement in 1.2
 397      */
 398     var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
 399
 400     /**
 401      * Whether CSS IDs should be added to the code
 402      * @var boolean
 403      */
 404     var $add_ids = false;
 405
 406     /**
 407      * Lines that should be highlighted extra
 408      * @var array
 409      */
 410     var $highlight_extra_lines = array();
 411
 412     /**
 413      * Styles of lines that should be highlighted extra
 414      * @var array
 415      */
 416     var $highlight_extra_lines_styles = array();
 417
 418     /**
 419      * Styles of extra-highlighted lines
 420      * @var string
 421      */
 422     var $highlight_extra_lines_style = 'background-color: #ffc;';
 423
 424     /**
 425      * The line ending
 426      * If null, nl2br() will be used on the result string.
 427      * Otherwise, all instances of \n will be replaced with $line_ending
 428      * @var string
 429      */
 430     var $line_ending = null;
 431
 432     /**
 433      * Number at which line numbers should start at
 434      * @var int
 435      */
 436     var $line_numbers_start = 1;
 437
 438     /**
 439      * The overall style for this code block
 440      * @var string
 441      */
 442     var $overall_style = 'font-family:monospace;';
 443
 444     /**
 445      *  The style for the actual code
 446      * @var string
 447      */
 448     var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
 449
 450     /**
 451      * The overall class for this code block
 452      * @var string
 453      */
 454     var $overall_class = '';
 455
 456     /**
 457      * The overall ID for this code block
 458      * @var string
 459      */
 460     var $overall_id = '';
 461
 462     /**
 463      * Line number styles
 464      * @var string
 465      */
 466     var $line_style1 = 'font-weight: normal; vertical-align:top;';
 467
 468     /**
 469      * Line number styles for fancy lines
 470      * @var string
 471      */
 472     var $line_style2 = 'font-weight: bold; vertical-align:top;';
 473
 474     /**
 475      * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
 476      * @var string
 477      */
 478     var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
 479
 480     /**
 481      * Flag for how line numbers are displayed
 482      * @var boolean
 483      */
 484     var $line_numbers = GESHI_NO_LINE_NUMBERS;
 485
 486     /**
 487      * Flag to decide if multi line spans are allowed. Set it to false to make sure
 488      * each tag is closed before and reopened after each linefeed.
 489      * @var boolean
 490      */
 491     var $allow_multiline_span = true;
 492
 493     /**
 494      * The "nth" value for fancy line highlighting
 495      * @var int
 496      */
 497     var $line_nth_row = 0;
 498
 499     /**
 500      * The size of tab stops
 501      * @var int
 502      */
 503     var $tab_width = 8;
 504
 505     /**
 506      * Should we use language-defined tab stop widths?
 507      * @var int
 508      */
 509     var $use_language_tab_width = false;
 510
 511     /**
 512      * Default target for keyword links
 513      * @var string
 514      */
 515     var $link_target = '';
 516
 517     /**
 518      * The encoding to use for entity encoding
 519      * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
 520      * @var string
 521      */
 522     var $encoding = 'utf-8';
 523
 524     /**
 525      * Should keywords be linked?
 526      * @var boolean
 527      */
 528     var $keyword_links = true;
 529
 530     /**
 531      * Currently loaded language file
 532      * @var string
 533      * @since 1.0.7.22
 534      */
 535     var $loaded_language = '';
 536
 537     /**
 538      * Wether the caches needed for parsing are built or not
 539      *
 540      * @var bool
 541      * @since 1.0.8
 542      */
 543     var $parse_cache_built = false;
 544
 545     /**
 546      * Work around for Suhosin Patch with disabled /e modifier
 547      *
 548      * Note from suhosins author in config file:
 549      * <blockquote>
 550      *   The /e modifier inside <code>preg_replace()</code> allows code execution.
 551      *   Often it is the cause for remote code execution exploits. It is wise to
 552      *   deactivate this feature and test where in the application it is used.
 553      *   The developer using the /e modifier should be made aware that he should
 554      *   use <code>preg_replace_callback()</code> instead
 555      * </blockquote>
 556      *
 557      * @var array
 558      * @since 1.0.8
 559      */
 560     var $_kw_replace_group = 0;
 561     var $_rx_key = 0;
 562
 563     /**
 564      * some "callback parameters" for handle_multiline_regexps
 565      *
 566      * @since 1.0.8
 567      * @access private
 568      * @var string
 569      */
 570     var $_hmr_before = '';
 571     var $_hmr_replace = '';
 572     var $_hmr_after = '';
 573     var $_hmr_key = 0;
 574
 575     /**#@-*/
 576
 577     /**
 578      * Creates a new GeSHi object, with source and language
 579      *
 580      * @param string The source code to highlight
 581      * @param string The language to highlight the source with
 582      * @param string The path to the language file directory. <b>This
 583      *               is deprecated!</b> I've backported the auto path
 584      *               detection from the 1.1.X dev branch, so now it
 585      *               should be automatically set correctly. If you have
 586      *               renamed the language directory however, you will
 587      *               still need to set the path using this parameter or
 588      *               {@link GeSHi->set_language_path()}
 589      * @since 1.0.0
 590      */
 591     function GeSHi($source = '', $language = '', $path = '') {
 592         if (!empty($source)) {
 593             $this->set_source($source);
 594         }
 595         if (!empty($language)) {
 596             $this->set_language($language);
 597         }
 598         $this->set_language_path($path);
 599     }
 600
 601     /**
 602      * Returns an error message associated with the last GeSHi operation,
 603      * or false if no error has occured
 604      *
 605      * @return string|false An error message if there has been an error, else false
 606      * @since  1.0.0
 607      */
 608     function error() {
 609         if ($this->error) {
 610             //Put some template variables for debugging here ...
 611             $debug_tpl_vars = array(
 612                 '{LANGUAGE}' => $this->language,
 613                 '{PATH}' => $this->language_path
 614             );
 615             $msg = str_replace(
 616                 array_keys($debug_tpl_vars),
 617                 array_values($debug_tpl_vars),
 618                 $this->error_messages[$this->error]);
 619
 620             return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
 621         }
 622         return false;
 623     }
 624
 625     /**
 626      * Gets a human-readable language name (thanks to Simon Patterson
 627      * for the idea :))
 628      *
 629      * @return string The name for the current language
 630      * @since  1.0.2
 631      */
 632     function get_language_name() {
 633         if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
 634             return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
 635         }
 636         return $this->language_data['LANG_NAME'];
 637     }
 638
 639     /**
 640      * Sets the source code for this object
 641      *
 642      * @param string The source code to highlight
 643      * @since 1.0.0
 644      */
 645     function set_source($source) {
 646         $this->source = $source;
 647         $this->highlight_extra_lines = array();
 648     }
 649
 650     /**
 651      * Sets the language for this object
 652      *
 653      * @note since 1.0.8 this function won't reset language-settings by default anymore!
 654      *       if you need this set $force_reset = true
 655      *
 656      * @param string The name of the language to use
 657      * @since 1.0.0
 658      */
 659     function set_language($language, $force_reset = false) {
 660         if ($force_reset) {
 661             $this->loaded_language = false;
 662         }
 663
 664         //Clean up the language name to prevent malicious code injection
 665         $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
 666
 667         $language = strtolower($language);
 668
 669         //Retreive the full filename
 670         $file_name = $this->language_path . $language . '.php';
 671         if ($file_name == $this->loaded_language) {
 672             // this language is already loaded!
 673             return;
 674         }
 675
 676         $this->language = $language;
 677
 678         $this->error = false;
 679         $this->strict_mode = GESHI_NEVER;
 680
 681         //Check if we can read the desired file
 682         if (!is_readable($file_name)) {
 683             $this->error = GESHI_ERROR_NO_SUCH_LANG;
 684             return;
 685         }
 686
 687         // Load the language for parsing
 688         $this->load_language($file_name);
 689     }
 690
 691     /**
 692      * Sets the path to the directory containing the language files. Note
 693      * that this path is relative to the directory of the script that included
 694      * geshi.php, NOT geshi.php itself.
 695      *
 696      * @param string The path to the language directory
 697      * @since 1.0.0
 698      * @deprecated The path to the language files should now be automatically
 699      *             detected, so this method should no longer be needed. The
 700      *             1.1.X branch handles manual setting of the path differently
 701      *             so this method will disappear in 1.2.0.
 702      */
 703     function set_language_path($path) {
 704         if(strpos($path,':')) {
 705             //Security Fix to prevent external directories using fopen wrappers.
 706             if(DIRECTORY_SEPARATOR == "\\") {
 707                 if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
 708                     return;
 709                 }
 710             } else {
 711                 return;
 712             }
 713         }
 714         if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
 715             //Security Fix to prevent external directories using fopen wrappers.
 716             return;
 717         }
 718         if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
 719             //Security Fix to prevent external directories using fopen wrappers.
 720             return;
 721         }
 722         if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
 723             //Security Fix to prevent external directories using fopen wrappers.
 724             return;
 725         }
 726         if ($path) {
 727             $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
 728             $this->set_language($this->language); // otherwise set_language_path has no effect
 729         }
 730     }
 731
 732     /**
 733      * Sets the type of header to be used.
 734      *
 735      * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
 736      * means more source code but more control over tab width and line-wrapping.
 737      * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
 738      * control. Default is GESHI_HEADER_PRE.
 739      *
 740      * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
 741      * should be outputted.
 742      *
 743      * @param int The type of header to be used
 744      * @since 1.0.0
 745      */
 746     function set_header_type($type) {
 747         //Check if we got a valid header type
 748         if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
 749             GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) {
 750             $this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
 751             return;
 752         }
 753
 754         //Set that new header type
 755         $this->header_type = $type;
 756     }
 757
 758     /**
 759      * Sets the styles for the code that will be outputted
 760      * when this object is parsed. The style should be a
 761      * string of valid stylesheet declarations
 762      *
 763      * @param string  The overall style for the outputted code block
 764      * @param boolean Whether to merge the styles with the current styles or not
 765      * @since 1.0.0
 766      */
 767     function set_overall_style($style, $preserve_defaults = false) {
 768         if (!$preserve_defaults) {
 769             $this->overall_style = $style;
 770         } else {
 771             $this->overall_style .= $style;
 772         }
 773     }
 774
 775     /**
 776      * Sets the overall classname for this block of code. This
 777      * class can then be used in a stylesheet to style this object's
 778      * output
 779      *
 780      * @param string The class name to use for this block of code
 781      * @since 1.0.0
 782      */
 783     function set_overall_class($class) {
 784         $this->overall_class = $class;
 785     }
 786
 787     /**
 788      * Sets the overall id for this block of code. This id can then
 789      * be used in a stylesheet to style this object's output
 790      *
 791      * @param string The ID to use for this block of code
 792      * @since 1.0.0
 793      */
 794     function set_overall_id($id) {
 795         $this->overall_id = $id;
 796     }
 797
 798     /**
 799      * Sets whether CSS classes should be used to highlight the source. Default
 800      * is off, calling this method with no arguments will turn it on
 801      *
 802      * @param boolean Whether to turn classes on or not
 803      * @since 1.0.0
 804      */
 805     function enable_classes($flag = true) {
 806         $this->use_classes = ($flag) ? true : false;
 807     }
 808
 809     /**
 810      * Sets the style for the actual code. This should be a string
 811      * containing valid stylesheet declarations. If $preserve_defaults is
 812      * true, then styles are merged with the default styles, with the
 813      * user defined styles having priority
 814      *
 815      * Note: Use this method to override any style changes you made to
 816      * the line numbers if you are using line numbers, else the line of
 817      * code will have the same style as the line number! Consult the
 818      * GeSHi documentation for more information about this.
 819      *
 820      * @param string  The style to use for actual code
 821      * @param boolean Whether to merge the current styles with the new styles
 822      * @since 1.0.2
 823      */
 824     function set_code_style($style, $preserve_defaults = false) {
 825         if (!$preserve_defaults) {
 826             $this->code_style = $style;
 827         } else {
 828             $this->code_style .= $style;
 829         }
 830     }
 831
 832     /**
 833      * Sets the styles for the line numbers.
 834      *
 835      * @param string The style for the line numbers that are "normal"
 836      * @param string|boolean If a string, this is the style of the line
 837      *        numbers that are "fancy", otherwise if boolean then this
 838      *        defines whether the normal styles should be merged with the
 839      *        new normal styles or not
 840      * @param boolean If set, is the flag for whether to merge the "fancy"
 841      *        styles with the current styles or not
 842      * @since 1.0.2
 843      */
 844     function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
 845         //Check if we got 2 or three parameters
 846         if (is_bool($style2)) {
 847             $preserve_defaults = $style2;
 848             $style2 = '';
 849         }
 850
 851         //Actually set the new styles
 852         if (!$preserve_defaults) {
 853             $this->line_style1 = $style1;
 854             $this->line_style2 = $style2;
 855         } else {
 856             $this->line_style1 .= $style1;
 857             $this->line_style2 .= $style2;
 858         }
 859     }
 860
 861     /**
 862      * Sets whether line numbers should be displayed.
 863      *
 864      * Valid values for the first parameter are:
 865      *
 866      *  - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed
 867      *  - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed
 868      *  - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed
 869      *
 870      * For fancy line numbers, the second parameter is used to signal which lines
 871      * are to be fancy. For example, if the value of this parameter is 5 then every
 872      * 5th line will be fancy.
 873      *
 874      * @param int How line numbers should be displayed
 875      * @param int Defines which lines are fancy
 876      * @since 1.0.0
 877      */
 878     function enable_line_numbers($flag, $nth_row = 5) {
 879         if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
 880             && GESHI_FANCY_LINE_NUMBERS != $flag) {
 881             $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
 882         }
 883         $this->line_numbers = $flag;
 884         $this->line_nth_row = $nth_row;
 885     }
 886
 887     /**
 888      * Sets wether spans and other HTML markup generated by GeSHi can
 889      * span over multiple lines or not. Defaults to true to reduce overhead.
 890      * Set it to false if you want to manipulate the output or manually display
 891      * the code in an ordered list.
 892      *
 893      * @param boolean Wether multiline spans are allowed or not
 894      * @since 1.0.7.22
 895      */
 896     function enable_multiline_span($flag) {
 897         $this->allow_multiline_span = (bool) $flag;
 898     }
 899
 900     /**
 901      * Get current setting for multiline spans, see GeSHi->enable_multiline_span().
 902      *
 903      * @see enable_multiline_span
 904      * @return bool
 905      */
 906     function get_multiline_span() {
 907         return $this->allow_multiline_span;
 908     }
 909
 910     /**
 911      * Sets the style for a keyword group. If $preserve_defaults is
 912      * true, then styles are merged with the default styles, with the
 913      * user defined styles having priority
 914      *
 915      * @param int     The key of the keyword group to change the styles of
 916      * @param string  The style to make the keywords
 917      * @param boolean Whether to merge the new styles with the old or just
 918      *                to overwrite them
 919      * @since 1.0.0
 920      */
 921     function set_keyword_group_style($key, $style, $preserve_defaults = false) {
 922         //Set the style for this keyword group
 923         if (!$preserve_defaults) {
 924             $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
 925         } else {
 926             $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
 927         }
 928
 929         //Update the lexic permissions
 930         if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
 931             $this->lexic_permissions['KEYWORDS'][$key] = true;
 932         }
 933     }
 934
 935     /**
 936      * Turns highlighting on/off for a keyword group
 937      *
 938      * @param int     The key of the keyword group to turn on or off
 939      * @param boolean Whether to turn highlighting for that group on or off
 940      * @since 1.0.0
 941      */
 942     function set_keyword_group_highlighting($key, $flag = true) {
 943         $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
 944     }
 945
 946     /**
 947      * Sets the styles for comment groups.  If $preserve_defaults is
 948      * true, then styles are merged with the default styles, with the
 949      * user defined styles having priority
 950      *
 951      * @param int     The key of the comment group to change the styles of
 952      * @param string  The style to make the comments
 953      * @param boolean Whether to merge the new styles with the old or just
 954      *                to overwrite them
 955      * @since 1.0.0
 956      */
 957     function set_comments_style($key, $style, $preserve_defaults = false) {
 958         if (!$preserve_defaults) {
 959             $this->language_data['STYLES']['COMMENTS'][$key] = $style;
 960         } else {
 961             $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
 962         }
 963     }
 964
 965     /**
 966      * Turns highlighting on/off for comment groups
 967      *
 968      * @param int     The key of the comment group to turn on or off
 969      * @param boolean Whether to turn highlighting for that group on or off
 970      * @since 1.0.0
 971      */
 972     function set_comments_highlighting($key, $flag = true) {
 973         $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
 974     }
 975
 976     /**
 977      * Sets the styles for escaped characters. If $preserve_defaults is
 978      * true, then styles are merged with the default styles, with the
 979      * user defined styles having priority
 980      *
 981      * @param string  The style to make the escape characters
 982      * @param boolean Whether to merge the new styles with the old or just
 983      *                to overwrite them
 984      * @since 1.0.0
 985      */
 986     function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
 987         if (!$preserve_defaults) {
 988             $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
 989         } else {
 990             $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
 991         }
 992     }
 993
 994     /**
 995      * Turns highlighting on/off for escaped characters
 996      *
 997      * @param boolean Whether to turn highlighting for escape characters on or off
 998      * @since 1.0.0
 999      */
1000     function set_escape_characters_highlighting($flag = true) {
1001         $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
1002     }
1003
1004     /**
1005      * Sets the styles for brackets. If $preserve_defaults is
1006      * true, then styles are merged with the default styles, with the
1007      * user defined styles having priority
1008      *
1009      * This method is DEPRECATED: use set_symbols_style instead.
1010      * This method will be removed in 1.2.X
1011      *
1012      * @param string  The style to make the brackets
1013      * @param boolean Whether to merge the new styles with the old or just
1014      *                to overwrite them
1015      * @since 1.0.0
1016      * @deprecated In favour of set_symbols_style
1017      */
1018     function set_brackets_style($style, $preserve_defaults = false) {
1019         if (!$preserve_defaults) {
1020             $this->language_data['STYLES']['BRACKETS'][0] = $style;
1021         } else {
1022             $this->language_data['STYLES']['BRACKETS'][0] .= $style;
1023         }
1024     }
1025
1026     /**
1027      * Turns highlighting on/off for brackets
1028      *
1029      * This method is DEPRECATED: use set_symbols_highlighting instead.
1030      * This method will be remove in 1.2.X
1031      *
1032      * @param boolean Whether to turn highlighting for brackets on or off
1033      * @since 1.0.0
1034      * @deprecated In favour of set_symbols_highlighting
1035      */
1036     function set_brackets_highlighting($flag) {
1037         $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
1038     }
1039
1040     /**
1041      * Sets the styles for symbols. If $preserve_defaults is
1042      * true, then styles are merged with the default styles, with the
1043      * user defined styles having priority
1044      *
1045      * @param string  The style to make the symbols
1046      * @param boolean Whether to merge the new styles with the old or just
1047      *                to overwrite them
1048      * @param int     Tells the group of symbols for which style should be set.
1049      * @since 1.0.1
1050      */
1051     function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
1052         // Update the style of symbols
1053         if (!$preserve_defaults) {
1054             $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
1055         } else {
1056             $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
1057         }
1058
1059         // For backward compatibility
1060         if (0 == $group) {
1061             $this->set_brackets_style ($style, $preserve_defaults);
1062         }
1063     }
1064
1065     /**
1066      * Turns highlighting on/off for symbols
1067      *
1068      * @param boolean Whether to turn highlighting for symbols on or off
1069      * @since 1.0.0
1070      */
1071     function set_symbols_highlighting($flag) {
1072         // Update lexic permissions for this symbol group
1073         $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
1074
1075         // For backward compatibility
1076         $this->set_brackets_highlighting ($flag);
1077     }
1078
1079     /**
1080      * Sets the styles for strings. If $preserve_defaults is
1081      * true, then styles are merged with the default styles, with the
1082      * user defined styles having priority
1083      *
1084      * @param string  The style to make the escape characters
1085      * @param boolean Whether to merge the new styles with the old or just
1086      *                to overwrite them
1087      * @since 1.0.0
1088      */
1089     function set_strings_style($style, $preserve_defaults = false) {
1090         if (!$preserve_defaults) {
1091             $this->language_data['STYLES']['STRINGS'][0] = $style;
1092         } else {
1093             $this->language_data['STYLES']['STRINGS'][0] .= $style;
1094         }
1095     }
1096
1097     /**
1098      * Turns highlighting on/off for strings
1099      *
1100      * @param boolean Whether to turn highlighting for strings on or off
1101      * @since 1.0.0
1102      */
1103     function set_strings_highlighting($flag) {
1104         $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
1105     }
1106
1107     /**
1108      * Sets the styles for strict code blocks. If $preserve_defaults is
1109      * true, then styles are merged with the default styles, with the
1110      * user defined styles having priority
1111      *
1112      * @param string  The style to make the script blocks
1113      * @param boolean Whether to merge the new styles with the old or just
1114      *                to overwrite them
1115      * @param int     Tells the group of script blocks for which style should be set.
1116      * @since 1.0.8.4
1117      */
1118     function set_script_style($style, $preserve_defaults = false, $group = 0) {
1119         // Update the style of symbols
1120         if (!$preserve_defaults) {
1121             $this->language_data['STYLES']['SCRIPT'][$group] = $style;
1122         } else {
1123             $this->language_data['STYLES']['SCRIPT'][$group] .= $style;
1124         }
1125     }
1126
1127     /**
1128      * Sets the styles for numbers. If $preserve_defaults is
1129      * true, then styles are merged with the default styles, with the
1130      * user defined styles having priority
1131      *
1132      * @param string  The style to make the numbers
1133      * @param boolean Whether to merge the new styles with the old or just
1134      *                to overwrite them
1135      * @since 1.0.0
1136      */
1137     function set_numbers_style($style, $preserve_defaults = false) {
1138         if (!$preserve_defaults) {
1139             $this->language_data['STYLES']['NUMBERS'][0] = $style;
1140         } else {
1141             $this->language_data['STYLES']['NUMBERS'][0] .= $style;
1142         }
1143     }
1144
1145     /**
1146      * Turns highlighting on/off for numbers
1147      *
1148      * @param boolean Whether to turn highlighting for numbers on or off
1149      * @since 1.0.0
1150      */
1151     function set_numbers_highlighting($flag) {
1152         $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
1153     }
1154
1155     /**
1156      * Sets the styles for methods. $key is a number that references the
1157      * appropriate "object splitter" - see the language file for the language
1158      * you are highlighting to get this number. If $preserve_defaults is
1159      * true, then styles are merged with the default styles, with the
1160      * user defined styles having priority
1161      *
1162      * @param int     The key of the object splitter to change the styles of
1163      * @param string  The style to make the methods
1164      * @param boolean Whether to merge the new styles with the old or just
1165      *                to overwrite them
1166      * @since 1.0.0
1167      */
1168     function set_methods_style($key, $style, $preserve_defaults = false) {
1169         if (!$preserve_defaults) {
1170             $this->language_data['STYLES']['METHODS'][$key] = $style;
1171         } else {
1172             $this->language_data['STYLES']['METHODS'][$key] .= $style;
1173         }
1174     }
1175
1176     /**
1177      * Turns highlighting on/off for methods
1178      *
1179      * @param boolean Whether to turn highlighting for methods on or off
1180      * @since 1.0.0
1181      */
1182     function set_methods_highlighting($flag) {
1183         $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
1184     }
1185
1186     /**
1187      * Sets the styles for regexps. If $preserve_defaults is
1188      * true, then styles are merged with the default styles, with the
1189      * user defined styles having priority
1190      *
1191      * @param string  The style to make the regular expression matches
1192      * @param boolean Whether to merge the new styles with the old or just
1193      *                to overwrite them
1194      * @since 1.0.0
1195      */
1196     function set_regexps_style($key, $style, $preserve_defaults = false) {
1197         if (!$preserve_defaults) {
1198             $this->language_data['STYLES']['REGEXPS'][$key] = $style;
1199         } else {
1200             $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
1201         }
1202     }
1203
1204     /**
1205      * Turns highlighting on/off for regexps
1206      *
1207      * @param int     The key of the regular expression group to turn on or off
1208      * @param boolean Whether to turn highlighting for the regular expression group on or off
1209      * @since 1.0.0
1210      */
1211     function set_regexps_highlighting($key, $flag) {
1212         $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
1213     }
1214
1215     /**
1216      * Sets whether a set of keywords are checked for in a case sensitive manner
1217      *
1218      * @param int The key of the keyword group to change the case sensitivity of
1219      * @param boolean Whether to check in a case sensitive manner or not
1220      * @since 1.0.0
1221      */
1222     function set_case_sensitivity($key, $case) {
1223         $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
1224     }
1225
1226     /**
1227      * Sets the case that keywords should use when found. Use the constants:
1228      *
1229      *  - GESHI_CAPS_NO_CHANGE: leave keywords as-is
1230      *  - GESHI_CAPS_UPPER: convert all keywords to uppercase where found
1231      *  - GESHI_CAPS_LOWER: convert all keywords to lowercase where found
1232      *
1233      * @param int A constant specifying what to do with matched keywords
1234      * @since 1.0.1
1235      */
1236     function set_case_keywords($case) {
1237         if (in_array($case, array(
1238             GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) {
1239             $this->language_data['CASE_KEYWORDS'] = $case;
1240         }
1241     }
1242
1243     /**
1244      * Sets how many spaces a tab is substituted for
1245      *
1246      * Widths below zero are ignored
1247      *
1248      * @param int The tab width
1249      * @since 1.0.0
1250      */
1251     function set_tab_width($width) {
1252         $this->tab_width = intval($width);
1253
1254         //Check if it fit's the constraints:
1255         if ($this->tab_width < 1) {
1256             //Return it to the default
1257             $this->tab_width = 8;
1258         }
1259     }
1260
1261     /**
1262      * Sets whether or not to use tab-stop width specifed by language
1263      *
1264      * @param boolean Whether to use language-specific tab-stop widths
1265      * @since 1.0.7.20
1266      */
1267     function set_use_language_tab_width($use) {
1268         $this->use_language_tab_width = (bool) $use;
1269     }
1270
1271     /**
1272      * Returns the tab width to use, based on the current language and user
1273      * preference
1274      *
1275      * @return int Tab width
1276      * @since 1.0.7.20
1277      */
1278     function get_real_tab_width() {
1279         if (!$this->use_language_tab_width ||
1280             !isset($this->language_data['TAB_WIDTH'])) {
1281             return $this->tab_width;
1282         } else {
1283             return $this->language_data['TAB_WIDTH'];
1284         }
1285     }
1286
1287     /**
1288      * Enables/disables strict highlighting. Default is off, calling this
1289      * method without parameters will turn it on. See documentation
1290      * for more details on strict mode and where to use it.
1291      *
1292      * @param boolean Whether to enable strict mode or not
1293      * @since 1.0.0
1294      */
1295     function enable_strict_mode($mode = true) {
1296         if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1297             $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
1298         }
1299     }
1300
1301     /**
1302      * Disables all highlighting
1303      *
1304      * @since 1.0.0
1305      * @todo  Rewrite with array traversal
1306      * @deprecated In favour of enable_highlighting
1307      */
1308     function disable_highlighting() {
1309         $this->enable_highlighting(false);
1310     }
1311
1312     /**
1313      * Enables all highlighting
1314      *
1315      * The optional flag parameter was added in version 1.0.7.21 and can be used
1316      * to enable (true) or disable (false) all highlighting.
1317      *
1318      * @since 1.0.0
1319      * @param boolean A flag specifying whether to enable or disable all highlighting
1320      * @todo  Rewrite with array traversal
1321      */
1322     function enable_highlighting($flag = true) {
1323         $flag = $flag ? true : false;
1324         foreach ($this->lexic_permissions as $key => $value) {
1325             if (is_array($value)) {
1326                 foreach ($value as $k => $v) {
1327                     $this->lexic_permissions[$key][$k] = $flag;
1328                 }
1329             } else {
1330                 $this->lexic_permissions[$key] = $flag;
1331             }
1332         }
1333
1334         // Context blocks
1335         $this->enable_important_blocks = $flag;
1336     }
1337
1338     /**
1339      * Given a file extension, this method returns either a valid geshi language
1340      * name, or the empty string if it couldn't be found
1341      *
1342      * @param string The extension to get a language name for
1343      * @param array  A lookup array to use instead of the default one
1344      * @since 1.0.5
1345      * @todo Re-think about how this method works (maybe make it private and/or make it
1346      *       a extension->lang lookup?)
1347      * @todo static?
1348      */
1349     function get_language_name_from_extension( $extension, $lookup = array() ) {
1350         if ( !is_array($lookup) || empty($lookup)) {
1351             $lookup = array(
1352                 'abap' => array('abap'),
1353                 'actionscript' => array('as'),
1354                 'ada' => array('a', 'ada', 'adb', 'ads'),
1355                 'apache' => array('conf'),
1356                 'asm' => array('ash', 'asm', 'inc'),
1357                 'asp' => array('asp'),
1358                 'bash' => array('sh'),
1359                 'bf' => array('bf'),
1360                 'c' => array('c', 'h'),
1361                 'c_mac' => array('c', 'h'),
1362                 'caddcl' => array(),
1363                 'cadlisp' => array(),
1364                 'cdfg' => array('cdfg'),
1365                 'cobol' => array('cbl'),
1366                 'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
1367                 'csharp' => array('cs'),
1368                 'css' => array('css'),
1369                 'd' => array('d'),
1370                 'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
1371                 'diff' => array('diff', 'patch'),
1372                 'dos' => array('bat', 'cmd'),
1373                 'gettext' => array('po', 'pot'),
1374                 'gml' => array('gml'),
1375                 'gnuplot' => array('plt'),
1376                 'groovy' => array('groovy'),
1377                 'haskell' => array('hs'),
1378                 'html4strict' => array('html', 'htm'),
1379                 'ini' => array('ini', 'desktop'),
1380                 'java' => array('java'),
1381                 'javascript' => array('js'),
1382                 'klonec' => array('kl1'),
1383                 'klonecpp' => array('klx'),
1384                 'latex' => array('tex'),
1385                 'lisp' => array('lisp'),
1386                 'lua' => array('lua'),
1387                 'matlab' => array('m'),
1388                 'mpasm' => array(),
1389                 'mysql' => array('sql'),
1390                 'nsis' => array(),
1391                 'objc' => array(),
1392                 'oobas' => array(),
1393                 'oracle8' => array(),
1394                 'oracle10' => array(),
1395                 'pascal' => array('pas'),
1396                 'perl' => array('pl', 'pm'),
1397                 'php' => array('php', 'php5', 'phtml', 'phps'),
1398                 'povray' => array('pov'),
1399                 'providex' => array('pvc', 'pvx'),
1400                 'prolog' => array('pl'),
1401                 'python' => array('py'),
1402                 'qbasic' => array('bi'),
1403                 'reg' => array('reg'),
1404                 'ruby' => array('rb'),
1405                 'sas' => array('sas'),
1406                 'scala' => array('scala'),
1407                 'scheme' => array('scm'),
1408                 'scilab' => array('sci'),
1409                 'smalltalk' => array('st'),
1410                 'smarty' => array(),
1411                 'tcl' => array('tcl'),
1412                 'vb' => array('bas'),
1413                 'vbnet' => array(),
1414                 'visualfoxpro' => array(),
1415                 'whitespace' => array('ws'),
1416                 'xml' => array('xml', 'svg', 'xrc'),
1417                 'z80' => array('z80', 'asm', 'inc')
1418             );
1419         }
1420
1421         foreach ($lookup as $lang => $extensions) {
1422             if (in_array($extension, $extensions)) {
1423                 return $lang;
1424             }
1425         }
1426         return '';
1427     }
1428
1429     /**
1430      * Given a file name, this method loads its contents in, and attempts
1431      * to set the language automatically. An optional lookup table can be
1432      * passed for looking up the language name. If not specified a default
1433      * table is used
1434      *
1435      * The language table is in the form
1436      * <pre>array(
1437      *   'lang_name' => array('extension', 'extension', ...),
1438      *   'lang_name' ...
1439      * );</pre>
1440      *
1441      * @param string The filename to load the source from
1442      * @param array  A lookup array to use instead of the default one
1443      * @todo Complete rethink of this and above method
1444      * @since 1.0.5
1445      */
1446     function load_from_file($file_name, $lookup = array()) {
1447         if (is_readable($file_name)) {
1448             $this->set_source(file_get_contents($file_name));
1449             $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1450         } else {
1451             $this->error = GESHI_ERROR_FILE_NOT_READABLE;
1452         }
1453     }
1454
1455     /**
1456      * Adds a keyword to a keyword group for highlighting
1457      *
1458      * @param int    The key of the keyword group to add the keyword to
1459      * @param string The word to add to the keyword group
1460      * @since 1.0.0
1461      */
1462     function add_keyword($key, $word) {
1463         if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
1464             $this->language_data['KEYWORDS'][$key][] = $word;
1465
1466             //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
1467             if ($this->parse_cache_built) {
1468                 $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
1469                 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
1470             }
1471         }
1472     }
1473
1474     /**
1475      * Removes a keyword from a keyword group
1476      *
1477      * @param int    The key of the keyword group to remove the keyword from
1478      * @param string The word to remove from the keyword group
1479      * @param bool   Wether to automatically recompile the optimized regexp list or not.
1480      *               Note: if you set this to false and @see GeSHi->parse_code() was already called once,
1481      *               for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
1482      *               or the removed keyword will stay in cache and still be highlighted! On the other hand
1483      *               it might be too expensive to recompile the regexp list for every removal if you want to
1484      *               remove a lot of keywords.
1485      * @since 1.0.0
1486      */
1487     function remove_keyword($key, $word, $recompile = true) {
1488         $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
1489         if ($key_to_remove !== false) {
1490             unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
1491
1492             //NEW in 1.0.8, optionally recompile keyword group
1493             if ($recompile && $this->parse_cache_built) {
1494                 $this->optimize_keyword_group($key);
1495             }
1496         }
1497     }
1498
1499     /**
1500      * Creates a new keyword group
1501      *
1502      * @param int    The key of the keyword group to create
1503      * @param string The styles for the keyword group
1504      * @param boolean Whether the keyword group is case sensitive ornot
1505      * @param array  The words to use for the keyword group
1506      * @since 1.0.0
1507      */
1508     function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
1509         $words = (array) $words;
1510         if  (empty($words)) {
1511             // empty word lists mess up highlighting
1512             return false;
1513         }
1514
1515         //Add the new keyword group internally
1516         $this->language_data['KEYWORDS'][$key] = $words;
1517         $this->lexic_permissions['KEYWORDS'][$key] = true;
1518         $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1519         $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1520
1521         //NEW in 1.0.8, cache keyword regexp
1522         if ($this->parse_cache_built) {
1523             $this->optimize_keyword_group($key);
1524         }
1525     }
1526
1527     /**
1528      * Removes a keyword group
1529      *
1530      * @param int    The key of the keyword group to remove
1531      * @since 1.0.0
1532      */
1533     function remove_keyword_group ($key) {
1534         //Remove the keyword group internally
1535         unset($this->language_data['KEYWORDS'][$key]);
1536         unset($this->lexic_permissions['KEYWORDS'][$key]);
1537         unset($this->language_data['CASE_SENSITIVE'][$key]);
1538         unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1539
1540         //NEW in 1.0.8
1541         unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
1542     }
1543
1544     /**
1545      * compile optimized regexp list for keyword group
1546      *
1547      * @param int   The key of the keyword group to compile & optimize
1548      * @since 1.0.8
1549      */
1550     function optimize_keyword_group($key) {
1551         $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
1552             $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
1553         $space_as_whitespace = false;
1554         if(isset($this->language_data['PARSER_CONTROL'])) {
1555             if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
1556                 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
1557                     $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
1558                 }
1559                 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1560                     if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1561                         $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
1562                     }
1563                 }
1564             }
1565         }
1566         if($space_as_whitespace) {
1567             foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
1568                 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
1569                     str_replace(" ", "\\s+", $rxv);
1570             }
1571         }
1572     }
1573
1574     /**
1575      * Sets the content of the header block
1576      *
1577      * @param string The content of the header block
1578      * @since 1.0.2
1579      */
1580     function set_header_content($content) {
1581         $this->header_content = $content;
1582     }
1583
1584     /**
1585      * Sets the content of the footer block
1586      *
1587      * @param string The content of the footer block
1588      * @since 1.0.2
1589      */
1590     function set_footer_content($content) {
1591         $this->footer_content = $content;
1592     }
1593
1594     /**
1595      * Sets the style for the header content
1596      *
1597      * @param string The style for the header content
1598      * @since 1.0.2
1599      */
1600     function set_header_content_style($style) {
1601         $this->header_content_style = $style;
1602     }
1603
1604     /**
1605      * Sets the style for the footer content
1606      *
1607      * @param string The style for the footer content
1608      * @since 1.0.2
1609      */
1610     function set_footer_content_style($style) {
1611         $this->footer_content_style = $style;
1612     }
1613
1614     /**
1615      * Sets whether to force a surrounding block around
1616      * the highlighted code or not
1617      *
1618      * @param boolean Tells whether to enable or disable this feature
1619      * @since 1.0.7.20
1620      */
1621     function enable_inner_code_block($flag) {
1622         $this->force_code_block = (bool)$flag;
1623     }
1624
1625     /**
1626      * Sets the base URL to be used for keywords
1627      *
1628      * @param int The key of the keyword group to set the URL for
1629      * @param string The URL to set for the group. If {FNAME} is in
1630      *               the url somewhere, it is replaced by the keyword
1631      *               that the URL is being made for
1632      * @since 1.0.2
1633      */
1634     function set_url_for_keyword_group($group, $url) {
1635         $this->language_data['URLS'][$group] = $url;
1636     }
1637
1638     /**
1639      * Sets styles for links in code
1640      *
1641      * @param int A constant that specifies what state the style is being
1642      *            set for - e.g. :hover or :visited
1643      * @param string The styles to use for that state
1644      * @since 1.0.2
1645      */
1646     function set_link_styles($type, $styles) {
1647         $this->link_styles[$type] = $styles;
1648     }
1649
1650     /**
1651      * Sets the target for links in code
1652      *
1653      * @param string The target for links in the code, e.g. _blank
1654      * @since 1.0.3
1655      */
1656     function set_link_target($target) {
1657         if (!$target) {
1658             $this->link_target = '';
1659         } else {
1660             $this->link_target = ' target="' . $target . '"';
1661         }
1662     }
1663
1664     /**
1665      * Sets styles for important parts of the code
1666      *
1667      * @param string The styles to use on important parts of the code
1668      * @since 1.0.2
1669      */
1670     function set_important_styles($styles) {
1671         $this->important_styles = $styles;
1672     }
1673
1674     /**
1675      * Sets whether context-important blocks are highlighted
1676      *
1677      * @param boolean Tells whether to enable or disable highlighting of important blocks
1678      * @todo REMOVE THIS SHIZ FROM GESHI!
1679      * @deprecated
1680      * @since 1.0.2
1681      */
1682     function enable_important_blocks($flag) {
1683         $this->enable_important_blocks = ( $flag ) ? true : false;
1684     }
1685
1686     /**
1687      * Whether CSS IDs should be added to each line
1688      *
1689      * @param boolean If true, IDs will be added to each line.
1690      * @since 1.0.2
1691      */
1692     function enable_ids($flag = true) {
1693         $this->add_ids = ($flag) ? true : false;
1694     }
1695
1696     /**
1697      * Specifies which lines to highlight extra
1698      *
1699      * The extra style parameter was added in 1.0.7.21.
1700      *
1701      * @param mixed An array of line numbers to highlight, or just a line
1702      *              number on its own.
1703      * @param string A string specifying the style to use for this line.
1704      *              If null is specified, the default style is used.
1705      *              If false is specified, the line will be removed from
1706      *              special highlighting
1707      * @since 1.0.2
1708      * @todo  Some data replication here that could be cut down on
1709      */
1710     function highlight_lines_extra($lines, $style = null) {
1711         if (is_array($lines)) {
1712             //Split up the job using single lines at a time
1713             foreach ($lines as $line) {
1714                 $this->highlight_lines_extra($line, $style);
1715             }
1716         } else {
1717             //Mark the line as being highlighted specially
1718             $lines = intval($lines);
1719             $this->highlight_extra_lines[$lines] = $lines;
1720
1721             //Decide on which style to use
1722             if ($style === null) { //Check if we should use default style
1723                 unset($this->highlight_extra_lines_styles[$lines]);
1724             } else if ($style === false) { //Check if to remove this line
1725                 unset($this->highlight_extra_lines[$lines]);
1726                 unset($this->highlight_extra_lines_styles[$lines]);
1727             } else {
1728                 $this->highlight_extra_lines_styles[$lines] = $style;
1729             }
1730         }
1731     }
1732
1733     /**
1734      * Sets the style for extra-highlighted lines
1735      *
1736      * @param string The style for extra-highlighted lines
1737      * @since 1.0.2
1738      */
1739     function set_highlight_lines_extra_style($styles) {
1740         $this->highlight_extra_lines_style = $styles;
1741     }
1742
1743     /**
1744      * Sets the line-ending
1745      *
1746      * @param string The new line-ending
1747      * @since 1.0.2
1748      */
1749     function set_line_ending($line_ending) {
1750         $this->line_ending = (string)$line_ending;
1751     }
1752
1753     /**
1754      * Sets what number line numbers should start at. Should
1755      * be a positive integer, and will be converted to one.
1756      *
1757      * <b>Warning:</b> Using this method will add the "start"
1758      * attribute to the &lt;ol&gt; that is used for line numbering.
1759      * This is <b>not</b> valid XHTML strict, so if that's what you
1760      * care about then don't use this method. Firefox is getting
1761      * support for the CSS method of doing this in 1.1 and Opera
1762      * has support for the CSS method, but (of course) IE doesn't
1763      * so it's not worth doing it the CSS way yet.
1764      *
1765      * @param int The number to start line numbers at
1766      * @since 1.0.2
1767      */
1768     function start_line_numbers_at($number) {
1769         $this->line_numbers_start = abs(intval($number));
1770     }
1771
1772     /**
1773      * Sets the encoding used for htmlspecialchars(), for international
1774      * support.
1775      *
1776      * NOTE: This is not needed for now because htmlspecialchars() is not
1777      * being used (it has a security hole in PHP4 that has not been patched).
1778      * Maybe in a future version it may make a return for speed reasons, but
1779      * I doubt it.
1780      *
1781      * @param string The encoding to use for the source
1782      * @since 1.0.3
1783      */
1784     function set_encoding($encoding) {
1785         if ($encoding) {
1786           $this->encoding = strtolower($encoding);
1787         }
1788     }
1789
1790     /**
1791      * Turns linking of keywords on or off.
1792      *
1793      * @param boolean If true, links will be added to keywords
1794      * @since 1.0.2
1795      */
1796     function enable_keyword_links($enable = true) {
1797         $this->keyword_links = (bool) $enable;
1798     }
1799
1800     /**
1801      * Setup caches needed for styling. This is automatically called in
1802      * parse_code() and get_stylesheet() when appropriate. This function helps
1803      * stylesheet generators as they rely on some style information being
1804      * preprocessed
1805      *
1806      * @since 1.0.8
1807      * @access private
1808      */
1809     function build_style_cache() {
1810         //Build the style cache needed to highlight numbers appropriate
1811         if($this->lexic_permissions['NUMBERS']) {
1812             //First check what way highlighting information for numbers are given
1813             if(!isset($this->language_data['NUMBERS'])) {
1814                 $this->language_data['NUMBERS'] = 0;
1815             }
1816
1817             if(is_array($this->language_data['NUMBERS'])) {
1818                 $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
1819             } else {
1820                 $this->language_data['NUMBERS_CACHE'] = array();
1821                 if(!$this->language_data['NUMBERS']) {
1822                     $this->language_data['NUMBERS'] =
1823                         GESHI_NUMBER_INT_BASIC |
1824                         GESHI_NUMBER_FLT_NONSCI;
1825                 }
1826
1827                 for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
1828                     //Rearrange style indices if required ...
1829                     if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
1830                         $this->language_data['STYLES']['NUMBERS'][$i] =
1831                             $this->language_data['STYLES']['NUMBERS'][1<<$i];
1832                         unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
1833                     }
1834
1835                     //Check if this bit is set for highlighting
1836                     if($j&1) {
1837                         //So this bit is set ...
1838                         //Check if it belongs to group 0 or the actual stylegroup
1839                         if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
1840                             $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
1841                         } else {
1842                             if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
1843                                 $this->language_data['NUMBERS_CACHE'][0] = 0;
1844                             }
1845                             $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
1846                         }
1847                     }
1848                 }
1849             }
1850         }
1851     }
1852
1853     /**
1854      * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
1855      * This function makes stylesheet generators much faster as they do not need these caches.
1856      *
1857      * @since 1.0.8
1858      * @access private
1859      */
1860     function build_parse_cache() {
1861         // cache symbol regexp
1862         //As this is a costy operation, we avoid doing it for multiple groups ...
1863         //Instead we perform it for all symbols at once.
1864         //
1865         //For this to work, we need to reorganize the data arrays.
1866         if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
1867             $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
1868
1869             $this->language_data['SYMBOL_DATA'] = array();
1870             $symbol_preg_multi = array(); // multi char symbols
1871             $symbol_preg_single = array(); // single char symbols
1872             foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
1873                 if (is_array($symbols)) {
1874                     foreach ($symbols as $sym) {
1875                         $sym = $this->hsc($sym);
1876                         if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
1877                             $this->language_data['SYMBOL_DATA'][$sym] = $key;
1878                             if (isset($sym[1])) { // multiple chars
1879                                 $symbol_preg_multi[] = preg_quote($sym, '/');
1880                             } else { // single char
1881                                 if ($sym == '-') {
1882                                     // don't trigger range out of order error
1883                                     $symbol_preg_single[] = '\-';
1884                                 } else {
1885                                     $symbol_preg_single[] = preg_quote($sym, '/');
1886                                 }
1887                             }
1888                         }
1889                     }
1890                 } else {
1891                     $symbols = $this->hsc($symbols);
1892                     if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
1893                         $this->language_data['SYMBOL_DATA'][$symbols] = 0;
1894                         if (isset($symbols[1])) { // multiple chars
1895                             $symbol_preg_multi[] = preg_quote($symbols, '/');
1896                         } else if ($symbols == '-') {
1897                             // don't trigger range out of order error
1898                             $symbol_preg_single[] = '\-';
1899                         } else { // single char
1900                             $symbol_preg_single[] = preg_quote($symbols, '/');
1901                         }
1902                     }
1903                 }
1904             }
1905
1906             //Now we have an array with each possible symbol as the key and the style as the actual data.
1907             //This way we can set the correct style just the moment we highlight ...
1908             //
1909             //Now we need to rewrite our array to get a search string that
1910             $symbol_preg = array();
1911             if (!empty($symbol_preg_multi)) {
1912                 rsort($symbol_preg_multi);
1913                 $symbol_preg[] = implode('|', $symbol_preg_multi);
1914             }
1915             if (!empty($symbol_preg_single)) {
1916                 rsort($symbol_preg_single);
1917                 $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
1918             }
1919             $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
1920         }
1921
1922         // cache optimized regexp for keyword matching
1923         // remove old cache
1924         $this->language_data['CACHED_KEYWORD_LISTS'] = array();
1925         foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
1926             if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
1927                     $this->lexic_permissions['KEYWORDS'][$key]) {
1928                 $this->optimize_keyword_group($key);
1929             }
1930         }
1931
1932         // brackets
1933         if ($this->lexic_permissions['BRACKETS']) {
1934             $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
1935             if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
1936                 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
1937                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#91;|>',
1938                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#93;|>',
1939                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#40;|>',
1940                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#41;|>',
1941                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#123;|>',
1942                     '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">&#125;|>',
1943                 );
1944             }
1945             else {
1946                 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
1947                     '<| class="br0">&#91;|>',
1948                     '<| class="br0">&#93;|>',
1949                     '<| class="br0">&#40;|>',
1950                     '<| class="br0">&#41;|>',
1951                     '<| class="br0">&#123;|>',
1952                     '<| class="br0">&#125;|>',
1953                 );
1954             }
1955         }
1956
1957         //Build the parse cache needed to highlight numbers appropriate
1958         if($this->lexic_permissions['NUMBERS']) {
1959             //Check if the style rearrangements have been processed ...
1960             //This also does some preprocessing to check which style groups are useable ...
1961             if(!isset($this->language_data['NUMBERS_CACHE'])) {
1962                 $this->build_style_cache();
1963             }
1964
1965             //Number format specification
1966             //All this formats are matched case-insensitively!
1967             static $numbers_format = array(
1968                 GESHI_NUMBER_INT_BASIC =>
1969                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?!(?m:$)))',
1970                 GESHI_NUMBER_INT_CSTYLE =>
1971                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z\.])',
1972                 GESHI_NUMBER_BIN_SUFFIX =>
1973                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?b(?![0-9a-z\.])',
1974                 GESHI_NUMBER_BIN_PREFIX_PERCENT =>
1975                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z\.])',
1976                 GESHI_NUMBER_BIN_PREFIX_0B =>
1977                     '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z\.])',
1978                 GESHI_NUMBER_OCT_PREFIX =>
1979                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z\.])',
1980                 GESHI_NUMBER_OCT_SUFFIX =>
1981                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z\.])',
1982                 GESHI_NUMBER_HEX_PREFIX =>
1983                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-f]+?(?![0-9a-z\.])',
1984                 GESHI_NUMBER_HEX_SUFFIX =>
1985                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-f]*?h(?![0-9a-z\.])',
1986                 GESHI_NUMBER_FLT_NONSCI =>
1987                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z\.])',
1988                 GESHI_NUMBER_FLT_NONSCI_F =>
1989                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z\.])',
1990                 GESHI_NUMBER_FLT_SCI_SHORT =>
1991                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z\.])',
1992                 GESHI_NUMBER_FLT_SCI_ZERO =>
1993                     '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z\.])'
1994                 );
1995
1996             //At this step we have an associative array with flag groups for a
1997             //specific style or an string denoting a regexp given its index.
1998             $this->language_data['NUMBERS_RXCACHE'] = array();
1999             foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
2000                 if(is_string($rxdata)) {
2001                     $regexp = $rxdata;
2002                 } else {
2003                     //This is a bitfield of number flags to highlight:
2004                     //Build an array, implode them together and make this the actual RX
2005                     $rxuse = array();
2006                     for($i = 1; $i <= $rxdata; $i<<=1) {
2007                         if($rxdata & $i) {
2008                             $rxuse[] = $numbers_format[$i];
2009                         }
2010                     }
2011                     $regexp = implode("|", $rxuse);
2012                 }
2013
2014                 $this->language_data['NUMBERS_RXCACHE'][$key] =
2015                     "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!\|>)(?![^\"\|\>\<]+<)/i";
2016             }
2017         }
2018
2019         $this->parse_cache_built = true;
2020     }
2021
2022     /**
2023      * Returns the code in $this->source, highlighted and surrounded by the
2024      * nessecary HTML.
2025      *
2026      * This should only be called ONCE, cos it's SLOW! If you want to highlight
2027      * the same source multiple times, you're better off doing a whole lot of
2028      * str_replaces to replace the &lt;span&gt;s
2029      *
2030      * @since 1.0.0
2031      */
2032     function parse_code () {
2033         // Start the timer
2034         $start_time = microtime();
2035
2036         // Firstly, if there is an error, we won't highlight
2037         if ($this->error) {
2038             //Escape the source for output
2039             $result = $this->hsc($this->source);
2040
2041             //This fix is related to SF#1923020, but has to be applied regardless of
2042             //actually highlighting symbols.
2043             $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
2044
2045             // Timing is irrelevant
2046             $this->set_time($start_time, $start_time);
2047             $this->finalise($result);
2048             return $result;
2049         }
2050
2051         // make sure the parse cache is up2date
2052         if (!$this->parse_cache_built) {
2053             $this->build_parse_cache();
2054         }
2055
2056         // Replace all newlines to a common form.
2057         $code = str_replace("\r\n", "\n", $this->source);
2058         $code = str_replace("\r", "\n", $code);
2059
2060         // Add spaces for regular expression matching and line numbers
2061 //        $code = "\n" . $code . "\n";
2062
2063         // Initialise various stuff
2064         $length           = strlen($code);
2065         $COMMENT_MATCHED  = false;
2066         $stuff_to_parse   = '';
2067         $endresult        = '';
2068
2069         // "Important" selections are handled like multiline comments
2070         // @todo GET RID OF THIS SHIZ
2071         if ($this->enable_important_blocks) {
2072             $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
2073         }
2074
2075         if ($this->strict_mode) {
2076             // Break the source into bits. Each bit will be a portion of the code
2077             // within script delimiters - for example, HTML between < and >
2078             $k = 0;
2079             $parts = array();
2080             $matches = array();
2081             $next_match_pointer = null;
2082             // we use a copy to unset delimiters on demand (when they are not found)
2083             $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
2084             $i = 0;
2085             while ($i < $length) {
2086                 $next_match_pos = $length + 1; // never true
2087                 foreach ($delim_copy as $dk => $delimiters) {
2088                     if(is_array($delimiters)) {
2089                         foreach ($delimiters as $open => $close) {
2090                             // make sure the cache is setup properly
2091                             if (!isset($matches[$dk][$open])) {
2092                                 $matches[$dk][$open] = array(
2093                                     'next_match' => -1,
2094                                     'dk' => $dk,
2095
2096                                     'open' => $open, // needed for grouping of adjacent code blocks (see below)
2097                                     'open_strlen' => strlen($open),
2098
2099                                     'close' => $close,
2100                                     'close_strlen' => strlen($close),
2101                                 );
2102                             }
2103                             // Get the next little bit for this opening string
2104                             if ($matches[$dk][$open]['next_match'] < $i) {
2105                                 // only find the next pos if it was not already cached
2106                                 $open_pos = strpos($code, $open, $i);
2107                                 if ($open_pos === false) {
2108                                     // no match for this delimiter ever
2109                                     unset($delim_copy[$dk][$open]);
2110                                     continue;
2111                                 }
2112                                 $matches[$dk][$open]['next_match'] = $open_pos;
2113                             }
2114                             if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
2115                                 //So we got a new match, update the close_pos
2116                                 $matches[$dk][$open]['close_pos'] =
2117                                     strpos($code, $close, $matches[$dk][$open]['next_match']+1);
2118
2119                                 $next_match_pointer =& $matches[$dk][$open];
2120                                 $next_match_pos = $matches[$dk][$open]['next_match'];
2121                             }
2122                         }
2123                     } else {
2124                         //So we should match an RegExp as Strict Block ...
2125                         /**
2126                          * The value in $delimiters is expected to be an RegExp
2127                          * containing exactly 2 matching groups:
2128                          *  - Group 1 is the opener
2129                          *  - Group 2 is the closer
2130                          */
2131                         if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work.
2132                             preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
2133                             //We got a match ...
2134                             if(isset($matches_rx['start']) && isset($matches_rx['end']))
2135                             {
2136                                 $matches[$dk] = array(
2137                                     'next_match' => $matches_rx['start'][1],
2138                                     'dk' => $dk,
2139
2140                                     'close_strlen' => strlen($matches_rx['end'][0]),
2141                                     'close_pos' => $matches_rx['end'][1],
2142                                     );
2143                             } else {
2144                                 $matches[$dk] = array(
2145                                     'next_match' => $matches_rx[1][1],
2146                                     'dk' => $dk,
2147
2148                                     'close_strlen' => strlen($matches_rx[2][0]),
2149                                     'close_pos' => $matches_rx[2][1],
2150                                     );
2151                             }
2152                         } else {
2153                             // no match for this delimiter ever
2154                             unset($delim_copy[$dk]);
2155                             continue;
2156                         }
2157
2158                         if ($matches[$dk]['next_match'] <= $next_match_pos) {
2159                             $next_match_pointer =& $matches[$dk];
2160                             $next_match_pos = $matches[$dk]['next_match'];
2161                         }
2162                     }
2163                 }
2164
2165                 // non-highlightable text
2166                 $parts[$k] = array(
2167                     1 => substr($code, $i, $next_match_pos - $i)
2168                 );
2169                 ++$k;
2170
2171                 if ($next_match_pos > $length) {
2172                     // out of bounds means no next match was found
2173                     break;
2174                 }
2175
2176                 // highlightable code
2177                 $parts[$k][0] = $next_match_pointer['dk'];
2178
2179                 //Only combine for non-rx script blocks
2180                 if(is_array($delim_copy[$next_match_pointer['dk']])) {
2181                     // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
2182                     $i = $next_match_pos + $next_match_pointer['open_strlen'];
2183                     while (true) {
2184                         $close_pos = strpos($code, $next_match_pointer['close'], $i);
2185                         if ($close_pos == false) {
2186                             break;
2187                         }
2188                         $i = $close_pos + $next_match_pointer['close_strlen'];
2189                         if ($i == $length) {
2190                             break;
2191                         }
2192                         if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
2193                             substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
2194                             // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
2195                             foreach ($matches as $submatches) {
2196                                 foreach ($submatches as $match) {
2197                                     if ($match['next_match'] == $i) {
2198                                         // a different block already matches here!
2199                                         break 3;
2200                                     }
2201                                 }
2202                             }
2203                         } else {
2204                             break;
2205                         }
2206                     }
2207                 } else {
2208                     $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
2209                     $i = $close_pos;
2210                 }
2211
2212                 if ($close_pos === false) {
2213                     // no closing delimiter found!
2214                     $parts[$k][1] = substr($code, $next_match_pos);
2215                     ++$k;
2216                     break;
2217                 } else {
2218                     $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
2219                     ++$k;
2220                 }
2221             }
2222             unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
2223             $num_parts = $k;
2224
2225             if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
2226                 // when we have only one part, we don't have anything to highlight at all.
2227                 // if we have a "maybe" strict language, this should be handled as highlightable code
2228                 $parts = array(
2229                     0 => array(
2230                         0 => '',
2231                         1 => ''
2232                     ),
2233                     1 => array(
2234                         0 => null,
2235                         1 => $parts[0][1]
2236                     )
2237                 );
2238                 $num_parts = 2;
2239             }
2240
2241         } else {
2242             // Not strict mode - simply dump the source into
2243             // the array at index 1 (the first highlightable block)
2244             $parts = array(
2245                 0 => array(
2246                     0 => '',
2247                     1 => ''
2248                 ),
2249                 1 => array(
2250                     0 => null,
2251                     1 => $code
2252                 )
2253             );
2254             $num_parts = 2;
2255         }
2256
2257         //Unset variables we won't need any longer
2258         unset($code);
2259
2260         //Preload some repeatedly used values regarding hardquotes ...
2261         $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
2262         $hq_strlen = strlen($hq);
2263
2264         //Preload if line numbers are to be generated afterwards
2265         //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
2266         $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
2267             !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
2268
2269         //preload the escape char for faster checking ...
2270         $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
2271
2272         // this is used for single-line comments
2273         $sc_disallowed_before = "";
2274         $sc_disallowed_after = "";
2275
2276         if (isset($this->language_data['PARSER_CONTROL'])) {
2277             if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
2278                 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
2279                     $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
2280                 }
2281                 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
2282                     $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
2283                 }
2284             }
2285         }
2286
2287         //Fix for SF#1932083: Multichar Quotemarks unsupported
2288         $is_string_starter = array();
2289         if ($this->lexic_permissions['STRINGS']) {
2290             foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
2291                 if (!isset($is_string_starter[$quotemark[0]])) {
2292                     $is_string_starter[$quotemark[0]] = (string)$quotemark;
2293                 } else if (is_string($is_string_starter[$quotemark[0]])) {
2294                     $is_string_starter[$quotemark[0]] = array(
2295                         $is_string_starter[$quotemark[0]],
2296                         $quotemark);
2297                 } else {
2298                     $is_string_starter[$quotemark[0]][] = $quotemark;
2299                 }
2300             }
2301         }
2302
2303         // Now we go through each part. We know that even-indexed parts are
2304         // code that shouldn't be highlighted, and odd-indexed parts should
2305         // be highlighted
2306         for ($key = 0; $key < $num_parts; ++$key) {
2307             $STRICTATTRS = '';
2308
2309             // If this block should be highlighted...
2310             if (!($key & 1)) {
2311                 // Else not a block to highlight
2312                 $endresult .= $this->hsc($parts[$key][1]);
2313                 unset($parts[$key]);
2314                 continue;
2315             }
2316
2317             $result = '';
2318             $part = $parts[$key][1];
2319
2320             $highlight_part = true;
2321             if ($this->strict_mode && !is_null($parts[$key][0])) {
2322                 // get the class key for this block of code
2323                 $script_key = $parts[$key][0];
2324                 $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
2325                 if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
2326                     $this->lexic_permissions['SCRIPT']) {
2327                     // Add a span element around the source to
2328                     // highlight the overall source block
2329                     if (!$this->use_classes &&
2330                         $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
2331                         $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
2332                     } else {
2333                         $attributes = ' class="sc' . $script_key . '"';
2334                     }
2335                     $result .= "<span$attributes>";
2336                     $STRICTATTRS = $attributes;
2337                 }
2338             }
2339
2340             if ($highlight_part) {
2341                 // Now, highlight the code in this block. This code
2342                 // is really the engine of GeSHi (along with the method
2343                 // parse_non_string_part).
2344
2345                 // cache comment regexps incrementally
2346                 $next_comment_regexp_key = '';
2347                 $next_comment_regexp_pos = -1;
2348                 $next_comment_multi_pos = -1;
2349                 $next_comment_single_pos = -1;
2350                 $comment_regexp_cache_per_key = array();
2351                 $comment_multi_cache_per_key = array();
2352                 $comment_single_cache_per_key = array();
2353                 $next_open_comment_multi = '';
2354                 $next_comment_single_key = '';
2355                 $escape_regexp_cache_per_key = array();
2356                 $next_escape_regexp_key = '';
2357                 $next_escape_regexp_pos = -1;
2358
2359                 $length = strlen($part);
2360                 for ($i = 0; $i < $length; ++$i) {
2361                     // Get the next char
2362                     $char = $part[$i];
2363                     $char_len = 1;
2364
2365                     // update regexp comment cache if needed
2366                     if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
2367                         $next_comment_regexp_pos = $length;
2368                         foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
2369                             $match_i = false;
2370                             if (isset($comment_regexp_cache_per_key[$comment_key]) &&
2371                                 ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
2372                                  $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
2373                                 // we have already matched something
2374                                 if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
2375                                     // this comment is never matched
2376                                     continue;
2377                                 }
2378                                 $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
2379                             } else if (
2380                                 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2381                                 (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) ||
2382                                 (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i))
2383                                 ) {
2384                                 $match_i = $match[0][1];
2385                                 if (GESHI_PHP_PRE_433) {
2386                                     $match_i += $i;
2387                                 }
2388
2389                                 $comment_regexp_cache_per_key[$comment_key] = array(
2390                                     'key' => $comment_key,
2391                                     'length' => strlen($match[0][0]),
2392                                     'pos' => $match_i
2393                                 );
2394                             } else {
2395                                 $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
2396                                 continue;
2397                             }
2398
2399                             if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
2400                                 $next_comment_regexp_pos = $match_i;
2401                                 $next_comment_regexp_key = $comment_key;
2402                                 if ($match_i === $i) {
2403                                     break;
2404                                 }
2405                             }
2406                         }
2407                     }
2408
2409                     $string_started = false;
2410
2411                     if (isset($is_string_starter[$char])) {
2412                         // Possibly the start of a new string ...
2413
2414                         //Check which starter it was ...
2415                         //Fix for SF#1932083: Multichar Quotemarks unsupported
2416                         if (is_array($is_string_starter[$char])) {
2417                             $char_new = '';
2418                             foreach ($is_string_starter[$char] as $testchar) {
2419                                 if ($testchar === substr($part, $i, strlen($testchar)) &&
2420                                     strlen($testchar) > strlen($char_new)) {
2421                                     $char_new = $testchar;
2422                                     $string_started = true;
2423                                 }
2424                             }
2425                             if ($string_started) {
2426                                 $char = $char_new;
2427                             }
2428                         } else {
2429                             $testchar = $is_string_starter[$char];
2430                             if ($testchar === substr($part, $i, strlen($testchar))) {
2431                                 $char = $testchar;
2432                                 $string_started = true;
2433                             }
2434                         }
2435                         $char_len = strlen($char);
2436                     }
2437
2438                     if ($string_started && ($i != $next_comment_regexp_pos)) {
2439                         // Hand out the correct style information for this string
2440                         $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
2441                         if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
2442                             !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
2443                             $string_key = 0;
2444                         }
2445
2446                         // parse the stuff before this
2447                         $result .= $this->parse_non_string_part($stuff_to_parse);
2448                         $stuff_to_parse = '';
2449
2450                         if (!$this->use_classes) {
2451                             $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
2452                         } else {
2453                             $string_attributes = ' class="st'.$string_key.'"';
2454                         }
2455
2456                         // now handle the string
2457                         $string = "<span$string_attributes>" . GeSHi::hsc($char);
2458                         $start = $i + $char_len;
2459                         $string_open = true;
2460
2461                         if(empty($this->language_data['ESCAPE_REGEXP'])) {
2462                             $next_escape_regexp_pos = $length;
2463                         }
2464
2465                         do {
2466                             //Get the regular ending pos ...
2467                             $close_pos = strpos($part, $char, $start);
2468                             if(false === $close_pos) {
2469                                 $close_pos = $length;
2470                             }
2471
2472                             if($this->lexic_permissions['ESCAPE_CHAR']) {
2473                                 // update escape regexp cache if needed
2474                                 if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
2475                                     $next_escape_regexp_pos = $length;
2476                                     foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
2477                                         $match_i = false;
2478                                         if (isset($escape_regexp_cache_per_key[$escape_key]) &&
2479                                             ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
2480                                              $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
2481                                             // we have already matched something
2482                                             if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
2483                                                 // this comment is never matched
2484                                                 continue;
2485                                             }
2486                                             $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
2487                                         } else if (
2488                                             //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2489                                             (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE)) ||
2490                                             (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start))
2491                                             ) {
2492                                             $match_i = $match[0][1];
2493                                             if (GESHI_PHP_PRE_433) {
2494                                                 $match_i += $start;
2495                                             }
2496
2497                                             $escape_regexp_cache_per_key[$escape_key] = array(
2498                                                 'key' => $escape_key,
2499                                                 'length' => strlen($match[0][0]),
2500                                                 'pos' => $match_i
2501                                             );
2502                                         } else {
2503                                             $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
2504                                             continue;
2505                                         }
2506
2507                                         if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
2508                                             $next_escape_regexp_pos = $match_i;
2509                                             $next_escape_regexp_key = $escape_key;
2510                                             if ($match_i === $start) {
2511                                                 break;
2512                                             }
2513                                         }
2514                                     }
2515                                 }
2516
2517                                 //Find the next simple escape position
2518                                 if('' != $this->language_data['ESCAPE_CHAR']) {
2519                                     $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
2520                                     if(false === $simple_escape) {
2521                                         $simple_escape = $length;
2522                                     }
2523                                 } else {
2524                                     $simple_escape = $length;
2525                                 }
2526                             } else {
2527                                 $next_escape_regexp_pos = $length;
2528                                 $simple_escape = $length;
2529                             }
2530
2531                             if($simple_escape < $next_escape_regexp_pos &&
2532                                 $simple_escape < $length &&
2533                                 $simple_escape < $close_pos) {
2534                                 //The nexxt escape sequence is a simple one ...
2535                                 $es_pos = $simple_escape;
2536
2537                                 //Add the stuff not in the string yet ...
2538                                 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2539
2540                                 //Get the style for this escaped char ...
2541                                 if (!$this->use_classes) {
2542                                     $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
2543                                 } else {
2544                                     $escape_char_attributes = ' class="es0"';
2545                                 }
2546
2547                                 //Add the style for the escape char ...
2548                                 $string .= "<span$escape_char_attributes>" .
2549                                     GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
2550
2551                                 //Get the byte AFTER the ESCAPE_CHAR we just found
2552                                 $es_char = $part[$es_pos + 1];
2553                                 if ($es_char == "\n") {
2554                                     // don't put a newline around newlines
2555                                     $string .= "</span>\n";
2556                                     $start = $es_pos + 2;
2557                                 } else if (ord($es_char) >= 128) {
2558                                     //This is an non-ASCII char (UTF8 or single byte)
2559                                     //This code tries to work around SF#2037598 ...
2560                                     if(function_exists('mb_substr')) {
2561                                         $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
2562                                         $string .= $es_char_m . '</span>';
2563                                     } else if (!GESHI_PHP_PRE_433 && 'utf-8' == $this->encoding) {
2564                                         if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
2565                                             "|\xE0[\xA0-\xBF][\x80-\xBF]".
2566                                             "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
2567                                             "|\xED[\x80-\x9F][\x80-\xBF]".
2568                                             "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
2569                                             "|[\xF1-\xF3][\x80-\xBF]{3}".
2570                                             "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
2571                                             $part, $es_char_m, null, $es_pos + 1)) {
2572                                             $es_char_m = $es_char_m[0];
2573                                         } else {
2574                                             $es_char_m = $es_char;
2575                                         }
2576                                         $string .= $this->hsc($es_char_m) . '</span>';
2577                                     } else {
2578                                         $es_char_m = $this->hsc($es_char);
2579                                     }
2580                                     $start = $es_pos + strlen($es_char_m) + 1;
2581                                 } else {
2582                                     $string .= $this->hsc($es_char) . '</span>';
2583                                     $start = $es_pos + 2;
2584                                 }
2585                             } else if ($next_escape_regexp_pos < $length &&
2586                                 $next_escape_regexp_pos < $close_pos) {
2587                                 $es_pos = $next_escape_regexp_pos;
2588                                 //Add the stuff not in the string yet ...
2589                                 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2590
2591                                 //Get the key and length of this match ...
2592                                 $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
2593                                 $escape_str = substr($part, $es_pos, $escape['length']);
2594                                 $escape_key = $escape['key'];
2595
2596                                 //Get the style for this escaped char ...
2597                                 if (!$this->use_classes) {
2598                                     $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
2599                                 } else {
2600                                     $escape_char_attributes = ' class="es' . $escape_key . '"';
2601                                 }
2602
2603                                 //Add the style for the escape char ...
2604                                 $string .= "<span$escape_char_attributes>" .
2605                                     $this->hsc($escape_str) . '</span>';
2606
2607                                 $start = $es_pos + $escape['length'];
2608                             } else {
2609                                 //Copy the remainder of the string ...
2610                                 $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
2611                                 $start = $close_pos + $char_len;
2612                                 $string_open = false;
2613                             }
2614                         } while($string_open);
2615
2616                         if ($check_linenumbers) {
2617                             // Are line numbers used? If, we should end the string before
2618                             // the newline and begin it again (so when <li>s are put in the source
2619                             // remains XHTML compliant)
2620                             // note to self: This opens up possibility of config files specifying
2621                             // that languages can/cannot have multiline strings???
2622                             $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2623                         }
2624
2625                         $result .= $string;
2626                         $string = '';
2627                         $i = $start - 1;
2628                         continue;
2629                     } else if ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
2630                         substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) {
2631                         // The start of a hard quoted string
2632                         if (!$this->use_classes) {
2633                             $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
2634                             $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
2635                         } else {
2636                             $string_attributes = ' class="st_h"';
2637                             $escape_char_attributes = ' class="es_h"';
2638                         }
2639                         // parse the stuff before this
2640                         $result .= $this->parse_non_string_part($stuff_to_parse);
2641                         $stuff_to_parse = '';
2642
2643                         // now handle the string
2644                         $string = '';
2645
2646                         // look for closing quote
2647                         $start = $i + $hq_strlen;
2648                         while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
2649                             $start = $close_pos + 1;
2650                             if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR']) {
2651                                 // make sure this quote is not escaped
2652                                 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2653                                     if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
2654                                         // check wether this quote is escaped or if it is something like '\\'
2655                                         $escape_char_pos = $close_pos - 1;
2656                                         while ($escape_char_pos > 0
2657                                                 && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
2658                                             --$escape_char_pos;
2659                                         }
2660                                         if (($close_pos - $escape_char_pos) & 1) {
2661                                             // uneven number of escape chars => this quote is escaped
2662                                             continue 2;
2663                                         }
2664                                     }
2665                                 }
2666                             }
2667
2668                             // found closing quote
2669                             break;
2670                         }
2671
2672                         //Found the closing delimiter?
2673                         if (!$close_pos) {
2674                             // span till the end of this $part when no closing delimiter is found
2675                             $close_pos = $length;
2676                         }
2677
2678                         //Get the actual string
2679                         $string = substr($part, $i, $close_pos - $i + 1);
2680                         $i = $close_pos;
2681
2682                         // handle escape chars and encode html chars
2683                         // (special because when we have escape chars within our string they may not be escaped)
2684                         if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
2685                             $start = 0;
2686                             $new_string = '';
2687                             while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
2688                                 // hmtl escape stuff before
2689                                 $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
2690                                 // check if this is a hard escape
2691                                 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2692                                     if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
2693                                         // indeed, this is a hardescape
2694                                         $new_string .= "<span$escape_char_attributes>" .
2695                                             $this->hsc($hardescape) . '</span>';
2696                                         $start = $es_pos + strlen($hardescape);
2697                                         continue 2;
2698                                     }
2699                                 }
2700                                 // not a hard escape, but a normal escape
2701                                 // they come in pairs of two
2702                                 $c = 0;
2703                                 while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
2704                                     && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
2705                                     && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
2706                                     $c += 2;
2707                                 }
2708                                 if ($c) {
2709                                     $new_string .= "<span$escape_char_attributes>" .
2710                                         str_repeat($escaped_escape_char, $c) .
2711                                         '</span>';
2712                                     $start = $es_pos + $c;
2713                                 } else {
2714                                     // this is just a single lonely escape char...
2715                                     $new_string .= $escaped_escape_char;
2716                                     $start = $es_pos + 1;
2717                                 }
2718                             }
2719                             $string = $new_string . $this->hsc(substr($string, $start));
2720                         } else {
2721                             $string = $this->hsc($string);
2722                         }
2723
2724                         if ($check_linenumbers) {
2725                             // Are line numbers used? If, we should end the string before
2726                             // the newline and begin it again (so when <li>s are put in the source
2727                             // remains XHTML compliant)
2728                             // note to self: This opens up possibility of config files specifying
2729                             // that languages can/cannot have multiline strings???
2730                             $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2731                         }
2732
2733                         $result .= "<span$string_attributes>" . $string . '</span>';
2734                         $string = '';
2735                         continue;
2736                     } else {
2737                         //Have a look for regexp comments
2738                         if ($i == $next_comment_regexp_pos) {
2739                             $COMMENT_MATCHED = true;
2740                             $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
2741                             $test_str = $this->hsc(substr($part, $i, $comment['length']));
2742
2743                             //@todo If remove important do remove here
2744                             if ($this->lexic_permissions['COMMENTS']['MULTI']) {
2745                                 if (!$this->use_classes) {
2746                                     $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
2747                                 } else {
2748                                     $attributes = ' class="co' . $comment['key'] . '"';
2749                                 }
2750
2751                                 $test_str = "<span$attributes>" . $test_str . "</span>";
2752
2753                                 // Short-cut through all the multiline code
2754                                 if ($check_linenumbers) {
2755                                     // strreplace to put close span and open span around multiline newlines
2756                                     $test_str = str_replace(
2757                                         "\n", "</span>\n<span$attributes>",
2758                                         str_replace("\n ", "\n&nbsp;", $test_str)
2759                                     );
2760                                 }
2761                             }
2762
2763                             $i += $comment['length'] - 1;
2764
2765                             // parse the rest
2766                             $result .= $this->parse_non_string_part($stuff_to_parse);
2767                             $stuff_to_parse = '';
2768                         }
2769
2770                         // If we haven't matched a regexp comment, try multi-line comments
2771                         if (!$COMMENT_MATCHED) {
2772                             // Is this a multiline comment?
2773                             if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
2774                                 $next_comment_multi_pos = $length;
2775                                 foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
2776                                     $match_i = false;
2777                                     if (isset($comment_multi_cache_per_key[$open]) &&
2778                                         ($comment_multi_cache_per_key[$open] >= $i ||
2779                                          $comment_multi_cache_per_key[$open] === false)) {
2780                                         // we have already matched something
2781                                         if ($comment_multi_cache_per_key[$open] === false) {
2782                                             // this comment is never matched
2783                                             continue;
2784                                         }
2785                                         $match_i = $comment_multi_cache_per_key[$open];
2786                                     } else if (($match_i = stripos($part, $open, $i)) !== false) {
2787                                         $comment_multi_cache_per_key[$open] = $match_i;
2788                                     } else {
2789                                         $comment_multi_cache_per_key[$open] = false;
2790                                         continue;
2791                                     }
2792                                     if ($match_i !== false && $match_i < $next_comment_multi_pos) {
2793                                         $next_comment_multi_pos = $match_i;
2794                                         $next_open_comment_multi = $open;
2795                                         if ($match_i === $i) {
2796                                             break;
2797                                         }
2798                                     }
2799                                 }
2800                             }
2801                             if ($i == $next_comment_multi_pos) {
2802                                 $open = $next_open_comment_multi;
2803                                 $close = $this->language_data['COMMENT_MULTI'][$open];
2804                                 $open_strlen = strlen($open);
2805                                 $close_strlen = strlen($close);
2806                                 $COMMENT_MATCHED = true;
2807                                 $test_str_match = $open;
2808                                 //@todo If remove important do remove here
2809                                 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2810                                     $open == GESHI_START_IMPORTANT) {
2811                                     if ($open != GESHI_START_IMPORTANT) {
2812                                         if (!$this->use_classes) {
2813                                             $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS']['MULTI'] . '"';
2814                                         } else {
2815                                             $attributes = ' class="coMULTI"';
2816                                         }
2817                                         $test_str = "<span$attributes>" . $this->hsc($open);
2818                                     } else {
2819                                         if (!$this->use_classes) {
2820                                             $attributes = ' style="' . $this->important_styles . '"';
2821                                         } else {
2822                                             $attributes = ' class="imp"';
2823                                         }
2824
2825                                         // We don't include the start of the comment if it's an
2826                                         // "important" part
2827                                         $test_str = "<span$attributes>";
2828                                     }
2829                                 } else {
2830                                     $test_str = $this->hsc($open);
2831                                 }
2832
2833                                 $close_pos = strpos( $part, $close, $i + $open_strlen );
2834
2835                                 if ($close_pos === false) {
2836                                     $close_pos = $length;
2837                                 }
2838
2839                                 // Short-cut through all the multiline code
2840                                 $rest_of_comment = $this->hsc(substr($part, $i + $open_strlen, $close_pos - $i - $open_strlen + $close_strlen));
2841                                 if (($this->lexic_permissions['COMMENTS']['MULTI'] ||
2842                                     $test_str_match == GESHI_START_IMPORTANT) &&
2843                                     $check_linenumbers) {
2844
2845                                     // strreplace to put close span and open span around multiline newlines
2846                                     $test_str .= str_replace(
2847                                         "\n", "</span>\n<span$attributes>",
2848                                         str_replace("\n ", "\n&nbsp;", $rest_of_comment)
2849                                     );
2850                                 } else {
2851                                     $test_str .= $rest_of_comment;
2852                                 }
2853
2854                                 if ($this->lexic_permissions['COMMENTS']['MULTI'] ||
2855                                     $test_str_match == GESHI_START_IMPORTANT) {
2856                                     $test_str .= '</span>';
2857                                 }
2858
2859                                 $i = $close_pos + $close_strlen - 1;
2860
2861                                 // parse the rest
2862                                 $result .= $this->parse_non_string_part($stuff_to_parse);
2863                                 $stuff_to_parse = '';
2864                             }
2865                         }
2866
2867                         // If we haven't matched a multiline comment, try single-line comments
2868                         if (!$COMMENT_MATCHED) {
2869                             // cache potential single line comment occurances
2870                             if (!empty($this->language_data['COMMENT_SINGLE']) && $next_comment_single_pos < $i) {
2871                                 $next_comment_single_pos = $length;
2872                                 foreach ($this->language_data['COMMENT_SINGLE'] as $comment_key => $comment_mark) {
2873                                     $match_i = false;
2874                                     if (isset($comment_single_cache_per_key[$comment_key]) &&
2875                                         ($comment_single_cache_per_key[$comment_key] >= $i ||
2876                                          $comment_single_cache_per_key[$comment_key] === false)) {
2877                                         // we have already matched something
2878                                         if ($comment_single_cache_per_key[$comment_key] === false) {
2879                                             // this comment is never matched
2880                                             continue;
2881                                         }
2882                                         $match_i = $comment_single_cache_per_key[$comment_key];
2883                                     } else if (
2884                                         // case sensitive comments
2885                                         ($this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
2886                                         ($match_i = stripos($part, $comment_mark, $i)) !== false) ||
2887                                         // non case sensitive
2888                                         (!$this->language_data['CASE_SENSITIVE'][GESHI_COMMENTS] &&
2889                                           (($match_i = strpos($part, $comment_mark, $i)) !== false))) {
2890                                         $comment_single_cache_per_key[$comment_key] = $match_i;
2891                                     } else {
2892                                         $comment_single_cache_per_key[$comment_key] = false;
2893                                         continue;
2894                                     }
2895                                     if ($match_i !== false && $match_i < $next_comment_single_pos) {
2896                                         $next_comment_single_pos = $match_i;
2897                                         $next_comment_single_key = $comment_key;
2898                                         if ($match_i === $i) {
2899                                             break;
2900                                         }
2901                                     }
2902                                 }
2903                             }
2904                             if ($next_comment_single_pos == $i) {
2905                                 $comment_key = $next_comment_single_key;
2906                                 $comment_mark = $this->language_data['COMMENT_SINGLE'][$comment_key];
2907                                 $com_len = strlen($comment_mark);
2908
2909                                 // This check will find special variables like $# in bash
2910                                 // or compiler directives of Delphi beginning {$
2911                                 if ((empty($sc_disallowed_before) || ($i == 0) ||
2912                                     (false === strpos($sc_disallowed_before, $part[$i-1]))) &&
2913                                     (empty($sc_disallowed_after) || ($length <= $i + $com_len) ||
2914                                     (false === strpos($sc_disallowed_after, $part[$i + $com_len]))))
2915                                 {
2916                                     // this is a valid comment
2917                                     $COMMENT_MATCHED = true;
2918                                     if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
2919                                         if (!$this->use_classes) {
2920                                             $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment_key] . '"';
2921                                         } else {
2922                                             $attributes = ' class="co' . $comment_key . '"';
2923                                         }
2924                                         $test_str = "<span$attributes>" . $this->hsc($this->change_case($comment_mark));
2925                                     } else {
2926                                         $test_str = $this->hsc($comment_mark);
2927                                     }
2928
2929                                     //Check if this comment is the last in the source
2930                                     $close_pos = strpos($part, "\n", $i);
2931                                     $oops = false;
2932                                     if ($close_pos === false) {
2933                                         $close_pos = $length;
2934                                         $oops = true;
2935                                     }
2936                                     $test_str .= $this->hsc(substr($part, $i + $com_len, $close_pos - $i - $com_len));
2937                                     if ($this->lexic_permissions['COMMENTS'][$comment_key]) {
2938                                         $test_str .= "</span>";
2939                                     }
2940
2941                                     // Take into account that the comment might be the last in the source
2942                                     if (!$oops) {
2943                                       $test_str .= "\n";
2944                                     }
2945
2946                                     $i = $close_pos;
2947
2948                                     // parse the rest
2949                                     $result .= $this->parse_non_string_part($stuff_to_parse);
2950                                     $stuff_to_parse = '';
2951                                 }
2952                             }
2953                         }
2954                     }
2955
2956                     // Where are we adding this char?
2957                     if (!$COMMENT_MATCHED) {
2958                         $stuff_to_parse .= $char;
2959                     } else {
2960                         $result .= $test_str;
2961                         unset($test_str);
2962                         $COMMENT_MATCHED = false;
2963                     }
2964                 }
2965                 // Parse the last bit
2966                 $result .= $this->parse_non_string_part($stuff_to_parse);
2967                 $stuff_to_parse = '';
2968             } else {
2969                 $result .= $this->hsc($part);
2970             }
2971             // Close the <span> that surrounds the block
2972             if ($STRICTATTRS != '') {
2973                 $result = str_replace("\n", "</span>\n<span$STRICTATTRS>", $result);
2974                 $result .= '</span>';
2975             }
2976
2977             $endresult .= $result;
2978             unset($part, $parts[$key], $result);
2979         }
2980
2981         //This fix is related to SF#1923020, but has to be applied regardless of
2982         //actually highlighting symbols.
2983         /** NOTE: memorypeak #3 */
2984         $endresult = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $endresult);
2985
2986 //        // Parse the last stuff (redundant?)
2987 //        $result .= $this->parse_non_string_part($stuff_to_parse);
2988
2989         // Lop off the very first and last spaces
2990 //        $result = substr($result, 1, -1);
2991
2992         // We're finished: stop timing
2993         $this->set_time($start_time, microtime());
2994
2995         $this->finalise($endresult);
2996         return $endresult;
2997     }
2998
2999     /**
3000      * Swaps out spaces and tabs for HTML indentation. Not needed if
3001      * the code is in a pre block...
3002      *
3003      * @param  string The source to indent (reference!)
3004      * @since  1.0.0
3005      * @access private
3006      */
3007     function indent(&$result) {
3008         /// Replace tabs with the correct number of spaces
3009         if (false !== strpos($result, "\t")) {
3010             $lines = explode("\n", $result);
3011             $result = null;//Save memory while we process the lines individually
3012             $tab_width = $this->get_real_tab_width();
3013             $tab_string = '&nbsp;' . str_repeat(' ', $tab_width);
3014
3015             for ($key = 0, $n = count($lines); $key < $n; $key++) {
3016                 $line = $lines[$key];
3017                 if (false === strpos($line, "\t")) {
3018                     continue;
3019                 }
3020
3021                 $pos = 0;
3022                 $length = strlen($line);
3023                 $lines[$key] = ''; // reduce memory
3024
3025                 $IN_TAG = false;
3026                 for ($i = 0; $i < $length; ++$i) {
3027                     $char = $line[$i];
3028                     // Simple engine to work out whether we're in a tag.
3029                     // If we are we modify $pos. This is so we ignore HTML
3030                     // in the line and only workout the tab replacement
3031                     // via the actual content of the string
3032                     // This test could be improved to include strings in the
3033                     // html so that < or > would be allowed in user's styles
3034                     // (e.g. quotes: '<' '>'; or similar)
3035                     if ($IN_TAG) {
3036                         if ('>' == $char) {
3037                             $IN_TAG = false;
3038                         }
3039                         $lines[$key] .= $char;
3040                     } else if ('<' == $char) {
3041                         $IN_TAG = true;
3042                         $lines[$key] .= '<';
3043                     } else if ('&' == $char) {
3044                         $substr = substr($line, $i + 3, 5);
3045                         $posi = strpos($substr, ';');
3046                         if (false === $posi) {
3047                             ++$pos;
3048                         } else {
3049                             $pos -= $posi+2;
3050                         }
3051                         $lines[$key] .= $char;
3052                     } else if ("\t" == $char) {
3053                         $str = '';
3054                         // OPTIMISE - move $strs out. Make an array:
3055                         // $tabs = array(
3056                         //  1 => '&nbsp;',
3057                         //  2 => '&nbsp; ',
3058                         //  3 => '&nbsp; &nbsp;' etc etc
3059                         // to use instead of building a string every time
3060                         $tab_end_width = $tab_width - ($pos % $tab_width); //Moved out of the look as it doesn't change within the loop
3061                         if (($pos & 1) || 1 == $tab_end_width) {
3062                             $str .= substr($tab_string, 6, $tab_end_width);
3063                         } else {
3064                             $str .= substr($tab_string, 0, $tab_end_width+5);
3065                         }
3066                         $lines[$key] .= $str;
3067                         $pos += $tab_end_width;
3068
3069                         if (false === strpos($line, "\t", $i + 1)) {
3070                             $lines[$key] .= substr($line, $i + 1);
3071                             break;
3072                         }
3073                     } else if (0 == $pos && ' ' == $char) {
3074                         $lines[$key] .= '&nbsp;';
3075                         ++$pos;
3076                     } else {
3077                         $lines[$key] .= $char;
3078                         ++$pos;
3079                     }
3080                 }
3081             }
3082             $result = implode("\n", $lines);
3083             unset($lines);//We don't need the lines separated beyond this --- free them!
3084         }
3085         // Other whitespace
3086         // BenBE: Fix to reduce the number of replacements to be done
3087         $result = preg_replace('/^ /m', '&nbsp;', $result);
3088         $result = str_replace('  ', ' &nbsp;', $result);
3089
3090         if ($this->line_numbers == GESHI_NO_LINE_NUMBERS) {
3091             if ($this->line_ending === null) {
3092                 $result = nl2br($result);
3093             } else {
3094                 $result = str_replace("\n", $this->line_ending, $result);
3095             }
3096         }
3097     }
3098
3099     /**
3100      * Changes the case of a keyword for those languages where a change is asked for
3101      *
3102      * @param  string The keyword to change the case of
3103      * @return string The keyword with its case changed
3104      * @since  1.0.0
3105      * @access private
3106      */
3107     function change_case($instr) {
3108         switch ($this->language_data['CASE_KEYWORDS']) {
3109             case GESHI_CAPS_UPPER:
3110                 return strtoupper($instr);
3111             case GESHI_CAPS_LOWER:
3112                 return strtolower($instr);
3113             default:
3114                 return $instr;
3115         }
3116     }
3117
3118     /**
3119      * Handles replacements of keywords to include markup and links if requested
3120      *
3121      * @param  string The keyword to add the Markup to
3122      * @return The HTML for the match found
3123      * @since  1.0.8
3124      * @access private
3125      *
3126      * @todo   Get rid of ender in keyword links
3127      */
3128     function handle_keyword_replace($match) {
3129         $k = $this->_kw_replace_group;
3130         $keyword = $match[0];
3131
3132         $before = '';
3133         $after = '';
3134
3135         if ($this->keyword_links) {
3136             // Keyword links have been ebabled
3137
3138             if (isset($this->language_data['URLS'][$k]) &&
3139                 $this->language_data['URLS'][$k] != '') {
3140                 // There is a base group for this keyword
3141
3142                 // Old system: strtolower
3143                 //$keyword = ( $this->language_data['CASE_SENSITIVE'][$group] ) ? $keyword : strtolower($keyword);
3144                 // New system: get keyword from language file to get correct case
3145                 if (!$this->language_data['CASE_SENSITIVE'][$k] &&
3146                     strpos($this->language_data['URLS'][$k], '{FNAME}') !== false) {
3147                     foreach ($this->language_data['KEYWORDS'][$k] as $word) {
3148                         if (strcasecmp($word, $keyword) == 0) {
3149                             break;
3150                         }
3151                     }
3152                 } else {
3153                     $word = $keyword;
3154                 }
3155
3156                 $before = '<|UR1|"' .
3157                     str_replace(
3158                         array(
3159                             '{FNAME}',
3160                             '{FNAMEL}',
3161                             '{FNAMEU}',
3162                             '.'),
3163                         array(
3164                             str_replace('+', '%20', urlencode($this->hsc($word))),
3165                             str_replace('+', '%20', urlencode($this->hsc(strtolower($word)))),
3166                             str_replace('+', '%20', urlencode($this->hsc(strtoupper($word)))),
3167                             '<DOT>'),
3168                         $this->language_data['URLS'][$k]
3169                     ) . '">';
3170                 $after = '</a>';
3171             }
3172         }
3173
3174         return $before . '<|/'. $k .'/>' . $this->change_case($keyword) . '|>' . $after;
3175     }
3176
3177     /**
3178      * handles regular expressions highlighting-definitions with callback functions
3179      *
3180      * @note this is a callback, don't use it directly
3181      *
3182      * @param array the matches array
3183      * @return The highlighted string
3184      * @since 1.0.8
3185      * @access private
3186      */
3187     function handle_regexps_callback($matches) {
3188         // before: "' style=\"' . call_user_func(\"$func\", '\\1') . '\"\\1|>'",
3189         return  ' style="' . call_user_func($this->language_data['STYLES']['REGEXPS'][$this->_rx_key], $matches[1]) . '"'. $matches[1] . '|>';
3190     }
3191
3192     /**
3193      * handles newlines in REGEXPS matches. Set the _hmr_* vars before calling this
3194      *
3195      * @note this is a callback, don't use it directly
3196      *
3197      * @param array the matches array
3198      * @return string
3199      * @since 1.0.8
3200      * @access private
3201      */
3202     function handle_multiline_regexps($matches) {
3203         $before = $this->_hmr_before;
3204         $after = $this->_hmr_after;
3205         if ($this->_hmr_replace) {
3206             $replace = $this->_hmr_replace;
3207             $search = array();
3208
3209             foreach (array_keys($matches) as $k) {
3210                 $search[] = '\\' . $k;
3211             }
3212
3213             $before = str_replace($search, $matches, $before);
3214             $after = str_replace($search, $matches, $after);
3215             $replace = str_replace($search, $matches, $replace);
3216         } else {
3217             $replace = $matches[0];
3218         }
3219         return $before
3220                     . '<|!REG3XP' . $this->_hmr_key .'!>'
3221                         . str_replace("\n", "|>\n<|!REG3XP" . $this->_hmr_key . '!>', $replace)
3222                     . '|>'
3223               . $after;
3224     }
3225
3226     /**
3227      * Takes a string that has no strings or comments in it, and highlights
3228      * stuff like keywords, numbers and methods.
3229      *
3230      * @param string The string to parse for keyword, numbers etc.
3231      * @since 1.0.0
3232      * @access private
3233      * @todo BUGGY! Why? Why not build string and return?
3234      */
3235     function parse_non_string_part($stuff_to_parse) {
3236         $stuff_to_parse = ' ' . $this->hsc($stuff_to_parse);
3237
3238         // Highlight keywords
3239         $disallowed_before = "(?<![a-zA-Z0-9\$_\|\#;>|^&";
3240         $disallowed_after = "(?![a-zA-Z0-9_\|%\\-&;";
3241         if ($this->lexic_permissions['STRINGS']) {
3242             $quotemarks = preg_quote(implode($this->language_data['QUOTEMARKS']), '/');
3243             $disallowed_before .= $quotemarks;
3244             $disallowed_after .= $quotemarks;
3245         }
3246         $disallowed_before .= "])";
3247         $disallowed_after .= "])";
3248
3249         $parser_control_pergroup = false;
3250         if (isset($this->language_data['PARSER_CONTROL'])) {
3251             if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
3252                 $x = 0; // check wether per-keyword-group parser_control is enabled
3253                 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'])) {
3254                     $disallowed_before = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_BEFORE'];
3255                     ++$x;
3256                 }
3257                 if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'])) {
3258                     $disallowed_after = $this->language_data['PARSER_CONTROL']['KEYWORDS']['DISALLOWED_AFTER'];
3259                     ++$x;
3260                 }
3261                 $parser_control_pergroup = (count($this->language_data['PARSER_CONTROL']['KEYWORDS']) - $x) > 0;
3262             }
3263         }
3264
3265         foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3266             if (!isset($this->lexic_permissions['KEYWORDS'][$k]) ||
3267             $this->lexic_permissions['KEYWORDS'][$k]) {
3268
3269                 $case_sensitive = $this->language_data['CASE_SENSITIVE'][$k];
3270                 $modifiers = $case_sensitive ? '' : 'i';
3271
3272                 // NEW in 1.0.8 - per-keyword-group parser control
3273                 $disallowed_before_local = $disallowed_before;
3274                 $disallowed_after_local = $disallowed_after;
3275                 if ($parser_control_pergroup && isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k])) {
3276                     if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'])) {
3277                         $disallowed_before_local =
3278                             $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_BEFORE'];
3279                     }
3280
3281                     if (isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'])) {
3282                         $disallowed_after_local =
3283                             $this->language_data['PARSER_CONTROL']['KEYWORDS'][$k]['DISALLOWED_AFTER'];
3284                     }
3285                 }
3286
3287                 $this->_kw_replace_group = $k;
3288
3289                 //NEW in 1.0.8, the cached regexp list
3290                 // since we don't want PHP / PCRE to crash due to too large patterns we split them into smaller chunks
3291                 for ($set = 0, $set_length = count($this->language_data['CACHED_KEYWORD_LISTS'][$k]); $set <  $set_length; ++$set) {
3292                     $keywordset =& $this->language_data['CACHED_KEYWORD_LISTS'][$k][$set];
3293                     // Might make a more unique string for putting the number in soon
3294                     // Basically, we don't put the styles in yet because then the styles themselves will
3295                     // get highlighted if the language has a CSS keyword in it (like CSS, for example ;))
3296                     $stuff_to_parse = preg_replace_callback(
3297                         "/$disallowed_before_local({$keywordset})(?!\<DOT\>(?:htm|php))$disallowed_after_local/$modifiers",
3298                         array($this, 'handle_keyword_replace'),
3299                         $stuff_to_parse
3300                         );
3301                 }
3302             }
3303         }
3304
3305         // Regular expressions
3306         foreach ($this->language_data['REGEXPS'] as $key => $regexp) {
3307             if ($this->lexic_permissions['REGEXPS'][$key]) {
3308                 if (is_array($regexp)) {
3309                     if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3310                         // produce valid HTML when we match multiple lines
3311                         $this->_hmr_replace = $regexp[GESHI_REPLACE];
3312                         $this->_hmr_before = $regexp[GESHI_BEFORE];
3313                         $this->_hmr_key = $key;
3314                         $this->_hmr_after = $regexp[GESHI_AFTER];
3315                         $stuff_to_parse = preg_replace_callback(
3316                             "/" . $regexp[GESHI_SEARCH] . "/{$regexp[GESHI_MODIFIERS]}",
3317                             array($this, 'handle_multiline_regexps'),
3318                             $stuff_to_parse);
3319                         $this->_hmr_replace = false;
3320                         $this->_hmr_before = '';
3321                         $this->_hmr_after = '';
3322                     } else {
3323                         $stuff_to_parse = preg_replace(
3324                             '/' . $regexp[GESHI_SEARCH] . '/' . $regexp[GESHI_MODIFIERS],
3325                             $regexp[GESHI_BEFORE] . '<|!REG3XP'. $key .'!>' . $regexp[GESHI_REPLACE] . '|>' . $regexp[GESHI_AFTER],
3326                             $stuff_to_parse);
3327                     }
3328                 } else {
3329                     if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3330                         // produce valid HTML when we match multiple lines
3331                         $this->_hmr_key = $key;
3332                         $stuff_to_parse = preg_replace_callback( "/(" . $regexp . ")/",
3333                                               array($this, 'handle_multiline_regexps'), $stuff_to_parse);
3334                         $this->_hmr_key = '';
3335                     } else {
3336                         $stuff_to_parse = preg_replace( "/(" . $regexp . ")/", "<|!REG3XP$key!>\\1|>", $stuff_to_parse);
3337                     }
3338                 }
3339             }
3340         }
3341
3342         // Highlight numbers. As of 1.0.8 we support different types of numbers
3343         $numbers_found = false;
3344         if ($this->lexic_permissions['NUMBERS'] && preg_match('#\d#', $stuff_to_parse )) {
3345             $numbers_found = true;
3346
3347             //For each of the formats ...
3348             foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3349                 //Check if it should be highlighted ...
3350                 $stuff_to_parse = preg_replace($regexp, "<|/NUM!$id/>\\1|>", $stuff_to_parse);
3351             }
3352         }
3353
3354         //
3355         // Now that's all done, replace /[number]/ with the correct styles
3356         //
3357         foreach (array_keys($this->language_data['KEYWORDS']) as $k) {
3358             if (!$this->use_classes) {
3359                 $attributes = ' style="' .
3360                     (isset($this->language_data['STYLES']['KEYWORDS'][$k]) ?
3361                     $this->language_data['STYLES']['KEYWORDS'][$k] : "") . '"';
3362             } else {
3363                 $attributes = ' class="kw' . $k . '"';
3364             }
3365             $stuff_to_parse = str_replace("<|/$k/>", "<|$attributes>", $stuff_to_parse);
3366         }
3367
3368         if ($numbers_found) {
3369             // Put number styles in
3370             foreach($this->language_data['NUMBERS_RXCACHE'] as $id => $regexp) {
3371                 //Commented out for now, as this needs some review ...
3372                 //                if ($numbers_permissions & $id) {
3373                 //Get the appropriate style ...
3374                 //Checking for unset styles is done by the style cache builder ...
3375                 if (!$this->use_classes) {
3376                     $attributes = ' style="' . $this->language_data['STYLES']['NUMBERS'][$id] . '"';
3377                 } else {
3378                     $attributes = ' class="nu'.$id.'"';
3379                 }
3380
3381                 //Set in the correct styles ...
3382                 $stuff_to_parse = str_replace("/NUM!$id/", $attributes, $stuff_to_parse);
3383                 //                }
3384             }
3385         }
3386
3387         // Highlight methods and fields in objects
3388         if ($this->lexic_permissions['METHODS'] && $this->language_data['OOLANG']) {
3389             $oolang_spaces = "[\s]*";
3390             $oolang_before = "";
3391             $oolang_after = "[a-zA-Z][a-zA-Z0-9_]*";
3392             if (isset($this->language_data['PARSER_CONTROL'])) {
3393                 if (isset($this->language_data['PARSER_CONTROL']['OOLANG'])) {
3394                     if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'])) {
3395                         $oolang_before = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_BEFORE'];
3396                     }
3397                     if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'])) {
3398                         $oolang_after = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_AFTER'];
3399                     }
3400                     if (isset($this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'])) {
3401                         $oolang_spaces = $this->language_data['PARSER_CONTROL']['OOLANG']['MATCH_SPACES'];
3402                     }
3403                 }
3404             }
3405
3406             foreach ($this->language_data['OBJECT_SPLITTERS'] as $key => $splitter) {
3407                 if (false !== strpos($stuff_to_parse, $splitter)) {
3408                     if (!$this->use_classes) {
3409                         $attributes = ' style="' . $this->language_data['STYLES']['METHODS'][$key] . '"';
3410                     } else {
3411                         $attributes = ' class="me' . $key . '"';
3412                     }
3413                     $stuff_to_parse = preg_replace("/($oolang_before)(" . preg_quote($this->language_data['OBJECT_SPLITTERS'][$key], '/') . ")($oolang_spaces)($oolang_after)/", "\\1\\2\\3<|$attributes>\\4|>", $stuff_to_parse);
3414                 }
3415             }
3416         }
3417
3418         //
3419         // Highlight brackets. Yes, I've tried adding a semi-colon to this list.
3420         // You try it, and see what happens ;)
3421         // TODO: Fix lexic permissions not converting entities if shouldn't
3422         // be highlighting regardless
3423         //
3424         if ($this->lexic_permissions['BRACKETS']) {
3425             $stuff_to_parse = str_replace( $this->language_data['CACHE_BRACKET_MATCH'],
3426                               $this->language_data['CACHE_BRACKET_REPLACE'], $stuff_to_parse );
3427         }
3428
3429
3430         //FIX for symbol highlighting ...
3431         if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
3432             //Get all matches and throw away those witin a block that is already highlighted... (i.e. matched by a regexp)
3433             $n_symbols = preg_match_all("/<\|(?:<DOT>|[^>])+>(?:(?!\|>).*?)\|>|<\/a>|(?:" . $this->language_data['SYMBOL_SEARCH'] . ")+/", $stuff_to_parse, $pot_symbols, PREG_OFFSET_CAPTURE | PREG_SET_ORDER);
3434             $global_offset = 0;
3435             for ($s_id = 0; $s_id < $n_symbols; ++$s_id) {
3436                 $symbol_match = $pot_symbols[$s_id][0][0];
3437                 if (strpos($symbol_match, '<') !== false || strpos($symbol_match, '>') !== false) {
3438                     // already highlighted blocks _must_ include either < or >
3439                     // so if this conditional applies, we have to skip this match
3440                     // BenBE: UNLESS the block contains <SEMI> or <PIPE>
3441                     if(strpos($symbol_match, '<SEMI>') === false &&
3442                         strpos($symbol_match, '<PIPE>') === false) {
3443                         continue;
3444                     }
3445                 }
3446
3447                 // if we reach this point, we have a valid match which needs to be highlighted
3448
3449                 $symbol_length = strlen($symbol_match);
3450                 $symbol_offset = $pot_symbols[$s_id][0][1];
3451                 unset($pot_symbols[$s_id]);
3452                 $symbol_end = $symbol_length + $symbol_offset;
3453                 $symbol_hl = "";
3454
3455                 // if we have multiple styles, we have to handle them properly
3456                 if ($this->language_data['MULTIPLE_SYMBOL_GROUPS']) {
3457                     $old_sym = -1;
3458                     // Split the current stuff to replace into its atomic symbols ...
3459                     preg_match_all("/" . $this->language_data['SYMBOL_SEARCH'] . "/", $symbol_match, $sym_match_syms, PREG_PATTERN_ORDER);
3460                     foreach ($sym_match_syms[0] as $sym_ms) {
3461                         //Check if consequtive symbols belong to the same group to save output ...
3462                         if (isset($this->language_data['SYMBOL_DATA'][$sym_ms])
3463                             && ($this->language_data['SYMBOL_DATA'][$sym_ms] != $old_sym)) {
3464                             if (-1 != $old_sym) {
3465                                 $symbol_hl .= "|>";
3466                             }
3467                             $old_sym = $this->language_data['SYMBOL_DATA'][$sym_ms];
3468                             if (!$this->use_classes) {
3469                                 $symbol_hl .= '<| style="' . $this->language_data['STYLES']['SYMBOLS'][$old_sym] . '">';
3470                             } else {
3471                                 $symbol_hl .= '<| class="sy' . $old_sym . '">';
3472                             }
3473                         }
3474                         $symbol_hl .= $sym_ms;
3475                     }
3476                     unset($sym_match_syms);
3477
3478                     //Close remaining tags and insert the replacement at the right position ...
3479                     //Take caution if symbol_hl is empty to avoid doubled closing spans.
3480                     if (-1 != $old_sym) {
3481                         $symbol_hl .= "|>";
3482                     }
3483                 } else {
3484                     if (!$this->use_classes) {
3485                         $symbol_hl = '<| style="' . $this->language_data['STYLES']['SYMBOLS'][0] . '">';
3486                     } else {
3487                         $symbol_hl = '<| class="sy0">';
3488                     }
3489                     $symbol_hl .= $symbol_match . '|>';
3490                 }
3491
3492                 $stuff_to_parse = substr_replace($stuff_to_parse, $symbol_hl, $symbol_offset + $global_offset, $symbol_length);
3493
3494                 // since we replace old text with something of different size,
3495                 // we'll have to keep track of the differences
3496                 $global_offset += strlen($symbol_hl) - $symbol_length;
3497             }
3498         }
3499         //FIX for symbol highlighting ...
3500
3501         // Add class/style for regexps
3502         foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3503             if ($this->lexic_permissions['REGEXPS'][$key]) {
3504                 if (is_callable($this->language_data['STYLES']['REGEXPS'][$key])) {
3505                     $this->_rx_key = $key;
3506                     $stuff_to_parse = preg_replace_callback("/!REG3XP$key!(.*)\|>/U",
3507                         array($this, 'handle_regexps_callback'),
3508                         $stuff_to_parse);
3509                 } else {
3510                     if (!$this->use_classes) {
3511                         $attributes = ' style="' . $this->language_data['STYLES']['REGEXPS'][$key] . '"';
3512                     } else {
3513                         if (is_array($this->language_data['REGEXPS'][$key]) &&
3514                             array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$key])) {
3515                             $attributes = ' class="' .
3516                                 $this->language_data['REGEXPS'][$key][GESHI_CLASS] . '"';
3517                         } else {
3518                            $attributes = ' class="re' . $key . '"';
3519                         }
3520                     }
3521                     $stuff_to_parse = str_replace("!REG3XP$key!", "$attributes", $stuff_to_parse);
3522                 }
3523             }
3524         }
3525
3526         // Replace <DOT> with . for urls
3527         $stuff_to_parse = str_replace('<DOT>', '.', $stuff_to_parse);
3528         // Replace <|UR1| with <a href= for urls also
3529         if (isset($this->link_styles[GESHI_LINK])) {
3530             if ($this->use_classes) {
3531                 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3532             } else {
3533                 $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' style="' . $this->link_styles[GESHI_LINK] . '" href=', $stuff_to_parse);
3534             }
3535         } else {
3536             $stuff_to_parse = str_replace('<|UR1|', '<a' . $this->link_target . ' href=', $stuff_to_parse);
3537         }
3538
3539         //
3540         // NOW we add the span thingy ;)
3541         //
3542
3543         $stuff_to_parse = str_replace('<|', '<span', $stuff_to_parse);
3544         $stuff_to_parse = str_replace ( '|>', '</span>', $stuff_to_parse );
3545         return substr($stuff_to_parse, 1);
3546     }
3547
3548     /**
3549      * Sets the time taken to parse the code
3550      *
3551      * @param microtime The time when parsing started
3552      * @param microtime The time when parsing ended
3553      * @since 1.0.2
3554      * @access private
3555      */
3556     function set_time($start_time, $end_time) {
3557         $start = explode(' ', $start_time);
3558         $end = explode(' ', $end_time);
3559         $this->time = $end[0] + $end[1] - $start[0] - $start[1];
3560     }
3561
3562     /**
3563      * Gets the time taken to parse the code
3564      *
3565      * @return double The time taken to parse the code
3566      * @since  1.0.2
3567      */
3568     function get_time() {
3569         return $this->time;
3570     }
3571
3572     /**
3573      * Merges arrays recursively, overwriting values of the first array with values of later arrays
3574      *
3575      * @since 1.0.8
3576      * @access private
3577      */
3578     function merge_arrays() {
3579         $arrays = func_get_args();
3580         $narrays = count($arrays);
3581
3582         // check arguments
3583         // comment out if more performance is necessary (in this case the foreach loop will trigger a warning if the argument is not an array)
3584         for ($i = 0; $i < $narrays; $i ++) {
3585             if (!is_array($arrays[$i])) {
3586                 // also array_merge_recursive returns nothing in this case
3587                 trigger_error('Argument #' . ($i+1) . ' is not an array - trying to merge array with scalar! Returning false!', E_USER_WARNING);
3588                 return false;
3589             }
3590         }
3591
3592         // the first array is in the output set in every case
3593         $ret = $arrays[0];
3594
3595         // merege $ret with the remaining arrays
3596         for ($i = 1; $i < $narrays; $i ++) {
3597             foreach ($arrays[$i] as $key => $value) {
3598                 if (is_array($value) && isset($ret[$key])) {
3599                     // if $ret[$key] is not an array you try to merge an scalar value with an array - the result is not defined (incompatible arrays)
3600                     // in this case the call will trigger an E_USER_WARNING and the $ret[$key] will be false.
3601                     $ret[$key] = $this->merge_arrays($ret[$key], $value);
3602                 } else {
3603                     $ret[$key] = $value;
3604                 }
3605             }
3606         }
3607
3608         return $ret;
3609     }
3610
3611     /**
3612      * Gets language information and stores it for later use
3613      *
3614      * @param string The filename of the language file you want to load
3615      * @since 1.0.0
3616      * @access private
3617      * @todo Needs to load keys for lexic permissions for keywords, regexps etc
3618      */
3619     function load_language($file_name) {
3620         if ($file_name == $this->loaded_language) {
3621             // this file is already loaded!
3622             return;
3623         }
3624
3625         //Prepare some stuff before actually loading the language file
3626         $this->loaded_language = $file_name;
3627         $this->parse_cache_built = false;
3628         $this->enable_highlighting();
3629         $language_data = array();
3630
3631         //Load the language file
3632         require $file_name;
3633
3634         // Perhaps some checking might be added here later to check that
3635         // $language data is a valid thing but maybe not
3636         $this->language_data = $language_data;
3637
3638         // Set strict mode if should be set
3639         $this->strict_mode = $this->language_data['STRICT_MODE_APPLIES'];
3640
3641         // Set permissions for all lexics to true
3642         // so they'll be highlighted by default
3643         foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
3644             if (!empty($this->language_data['KEYWORDS'][$key])) {
3645                 $this->lexic_permissions['KEYWORDS'][$key] = true;
3646             } else {
3647                 $this->lexic_permissions['KEYWORDS'][$key] = false;
3648             }
3649         }
3650
3651         foreach (array_keys($this->language_data['COMMENT_SINGLE']) as $key) {
3652             $this->lexic_permissions['COMMENTS'][$key] = true;
3653         }
3654         foreach (array_keys($this->language_data['REGEXPS']) as $key) {
3655             $this->lexic_permissions['REGEXPS'][$key] = true;
3656         }
3657
3658         // for BenBE and future code reviews:
3659         // we can use empty here since we only check for existance and emptiness of an array
3660         // if it is not an array at all but rather false or null this will work as intended as well
3661         // even if $this->language_data['PARSER_CONTROL'] is undefined this won't trigger a notice
3662         if (!empty($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'])) {
3663             foreach ($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS'] as $flag => $value) {
3664                 // it's either true or false and maybe is true as well
3665                 $perm = $value !== GESHI_NEVER;
3666                 if ($flag == 'ALL') {
3667                     $this->enable_highlighting($perm);
3668                     continue;
3669                 }
3670                 if (!isset($this->lexic_permissions[$flag])) {
3671                     // unknown lexic permission
3672                     continue;
3673                 }
3674                 if (is_array($this->lexic_permissions[$flag])) {
3675                     foreach ($this->lexic_permissions[$flag] as $key => $val) {
3676                         $this->lexic_permissions[$flag][$key] = $perm;
3677                     }
3678                 } else {
3679                     $this->lexic_permissions[$flag] = $perm;
3680                 }
3681             }
3682             unset($this->language_data['PARSER_CONTROL']['ENABLE_FLAGS']);
3683         }
3684
3685         //Fix: Problem where hardescapes weren't handled if no ESCAPE_CHAR was given
3686         //You need to set one for HARDESCAPES only in this case.
3687         if(!isset($this->language_data['HARDCHAR'])) {
3688             $this->language_data['HARDCHAR'] = $this->language_data['ESCAPE_CHAR'];
3689         }
3690
3691         //NEW in 1.0.8: Allow styles to be loaded from a separate file to override defaults
3692         $style_filename = substr($file_name, 0, -4) . '.style.php';
3693         if (is_readable($style_filename)) {
3694             //Clear any style_data that could have been set before ...
3695             if (isset($style_data)) {
3696                 unset($style_data);
3697             }
3698
3699             //Read the Style Information from the style file
3700             include $style_filename;
3701
3702             //Apply the new styles to our current language styles
3703             if (isset($style_data) && is_array($style_data)) {
3704                 $this->language_data['STYLES'] =
3705                     $this->merge_arrays($this->language_data['STYLES'], $style_data);
3706             }
3707         }
3708     }
3709
3710     /**
3711      * Takes the parsed code and various options, and creates the HTML
3712      * surrounding it to make it look nice.
3713      *
3714      * @param  string The code already parsed (reference!)
3715      * @since  1.0.0
3716      * @access private
3717      */
3718     function finalise(&$parsed_code) {
3719         // Remove end parts of important declarations
3720         // This is BUGGY!! My fault for bad code: fix coming in 1.2
3721         // @todo Remove this crap
3722         if ($this->enable_important_blocks &&
3723             (strpos($parsed_code, $this->hsc(GESHI_START_IMPORTANT)) === false)) {
3724             $parsed_code = str_replace($this->hsc(GESHI_END_IMPORTANT), '', $parsed_code);
3725         }
3726
3727         // Add HTML whitespace stuff if we're using the <div> header
3728         if ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) {
3729             $this->indent($parsed_code);
3730         }
3731
3732         // purge some unnecessary stuff
3733         /** NOTE: memorypeak #1 */
3734         $parsed_code = preg_replace('#<span[^>]+>(\s*)</span>#', '\\1', $parsed_code);
3735
3736         // If we are using IDs for line numbers, there needs to be an overall
3737         // ID set to prevent collisions.
3738         if ($this->add_ids && !$this->overall_id) {
3739             $this->overall_id = 'geshi-' . substr(md5(microtime()), 0, 4);
3740         }
3741
3742         // Get code into lines
3743         /** NOTE: memorypeak #2 */
3744         $code = explode("\n", $parsed_code);
3745         $parsed_code = $this->header();
3746
3747         // If we're using line numbers, we insert <li>s and appropriate
3748         // markup to style them (otherwise we don't need to do anything)
3749         if ($this->line_numbers != GESHI_NO_LINE_NUMBERS && $this->header_type != GESHI_HEADER_PRE_TABLE) {
3750             // If we're using the <pre> header, we shouldn't add newlines because
3751             // the <pre> will line-break them (and the <li>s already do this for us)
3752             $ls = ($this->header_type != GESHI_HEADER_PRE && $this->header_type != GESHI_HEADER_PRE_VALID) ? "\n" : '';
3753
3754             // Set vars to defaults for following loop
3755             $i = 0;
3756
3757             // Foreach line...
3758             for ($i = 0, $n = count($code); $i < $n;) {
3759                 //Reset the attributes for a new line ...
3760                 $attrs = array();
3761
3762                 // Make lines have at least one space in them if they're empty
3763                 // BenBE: Checking emptiness using trim instead of relying on blanks
3764                 if ('' == trim($code[$i])) {
3765                     $code[$i] = '&nbsp;';
3766                 }
3767
3768                 // If this is a "special line"...
3769                 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3770                     $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3771                     // Set the attributes to style the line
3772                     if ($this->use_classes) {
3773                         //$attr = ' class="li2"';
3774                         $attrs['class'][] = 'li2';
3775                         $def_attr = ' class="de2"';
3776                     } else {
3777                         //$attr = ' style="' . $this->line_style2 . '"';
3778                         $attrs['style'][] = $this->line_style2;
3779                         // This style "covers up" the special styles set for special lines
3780                         // so that styles applied to special lines don't apply to the actual
3781                         // code on that line
3782                         $def_attr = ' style="' . $this->code_style . '"';
3783                     }
3784                 } else {
3785                     if ($this->use_classes) {
3786                         //$attr = ' class="li1"';
3787                         $attrs['class'][] = 'li1';
3788                         $def_attr = ' class="de1"';
3789                     } else {
3790                         //$attr = ' style="' . $this->line_style1 . '"';
3791                         $attrs['style'][] = $this->line_style1;
3792                         $def_attr = ' style="' . $this->code_style . '"';
3793                     }
3794                 }
3795
3796                 //Check which type of tag to insert for this line
3797                 if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3798                     $start = "<pre$def_attr>";
3799                     $end = '</pre>';
3800                 } else {
3801                     // Span or div?
3802                     $start = "<div$def_attr>";
3803                     $end = '</div>';
3804                 }
3805
3806                 ++$i;
3807
3808                 // Are we supposed to use ids? If so, add them
3809                 if ($this->add_ids) {
3810                     $attrs['id'][] = "$this->overall_id-$i";
3811                 }
3812
3813                 //Is this some line with extra styles???
3814                 if (in_array($i, $this->highlight_extra_lines)) {
3815                     if ($this->use_classes) {
3816                         if (isset($this->highlight_extra_lines_styles[$i])) {
3817                             $attrs['class'][] = "lx$i";
3818                         } else {
3819                             $attrs['class'][] = "ln-xtra";
3820                         }
3821                     } else {
3822                         array_push($attrs['style'], $this->get_line_style($i));
3823                     }
3824                 }
3825
3826                 // Add in the line surrounded by appropriate list HTML
3827                 $attr_string = '';
3828                 foreach ($attrs as $key => $attr) {
3829                     $attr_string .= ' ' . $key . '="' . implode(' ', $attr) . '"';
3830                 }
3831
3832                 $parsed_code .= "<li$attr_string>$start{$code[$i-1]}$end</li>$ls";
3833                 unset($code[$i - 1]);
3834             }
3835         } else {
3836             $n = count($code);
3837             if ($this->use_classes) {
3838                 $attributes = ' class="de1"';
3839             } else {
3840                 $attributes = ' style="'. $this->code_style .'"';
3841             }
3842             if ($this->header_type == GESHI_HEADER_PRE_VALID) {
3843                 $parsed_code .= '<pre'. $attributes .'>';
3844             } elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
3845                 if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3846                     if ($this->use_classes) {
3847                         $attrs = ' class="ln"';
3848                     } else {
3849                         $attrs = ' style="'. $this->table_linenumber_style .'"';
3850                     }
3851                     $parsed_code .= '<td'.$attrs.'><pre'.$attributes.'>';
3852                     // get linenumbers
3853                     // we don't merge it with the for below, since it should be better for
3854                     // memory consumption this way
3855                     // @todo: but... actually it would still be somewhat nice to merge the two loops
3856                     //        the mem peaks are at different positions
3857                     for ($i = 0; $i < $n; ++$i) {
3858                         $close = 0;
3859                         // fancy lines
3860                         if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3861                             $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3862                             // Set the attributes to style the line
3863                             if ($this->use_classes) {
3864                                 $parsed_code .= '<span class="xtra li2"><span class="de2">';
3865                             } else {
3866                                 // This style "covers up" the special styles set for special lines
3867                                 // so that styles applied to special lines don't apply to the actual
3868                                 // code on that line
3869                                 $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3870                                                   .'<span style="' . $this->code_style .'">';
3871                             }
3872                             $close += 2;
3873                         }
3874                         //Is this some line with extra styles???
3875                         if (in_array($i + 1, $this->highlight_extra_lines)) {
3876                             if ($this->use_classes) {
3877                                 if (isset($this->highlight_extra_lines_styles[$i])) {
3878                                     $parsed_code .= "<span class=\"xtra lx$i\">";
3879                                 } else {
3880                                     $parsed_code .= "<span class=\"xtra ln-xtra\">";
3881                                 }
3882                             } else {
3883                                 $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3884                             }
3885                             ++$close;
3886                         }
3887                         $parsed_code .= $this->line_numbers_start + $i;
3888                         if ($close) {
3889                             $parsed_code .= str_repeat('</span>', $close);
3890                         } else if ($i != $n) {
3891                             $parsed_code .= "\n";
3892                         }
3893                     }
3894                     $parsed_code .= '</pre></td><td'.$attributes.'>';
3895                 }
3896                 $parsed_code .= '<pre'. $attributes .'>';
3897             }
3898             // No line numbers, but still need to handle highlighting lines extra.
3899             // Have to use divs so the full width of the code is highlighted
3900             $close = 0;
3901             for ($i = 0; $i < $n; ++$i) {
3902                 // Make lines have at least one space in them if they're empty
3903                 // BenBE: Checking emptiness using trim instead of relying on blanks
3904                 if ('' == trim($code[$i])) {
3905                     $code[$i] = '&nbsp;';
3906                 }
3907                 // fancy lines
3908                 if ($this->line_numbers == GESHI_FANCY_LINE_NUMBERS &&
3909                     $i % $this->line_nth_row == ($this->line_nth_row - 1)) {
3910                     // Set the attributes to style the line
3911                     if ($this->use_classes) {
3912                         $parsed_code .= '<span class="xtra li2"><span class="de2">';
3913                     } else {
3914                         // This style "covers up" the special styles set for special lines
3915                         // so that styles applied to special lines don't apply to the actual
3916                         // code on that line
3917                         $parsed_code .= '<span style="display:block;' . $this->line_style2 . '">'
3918                                           .'<span style="' . $this->code_style .'">';
3919                     }
3920                     $close += 2;
3921                 }
3922                 //Is this some line with extra styles???
3923                 if (in_array($i + 1, $this->highlight_extra_lines)) {
3924                     if ($this->use_classes) {
3925                         if (isset($this->highlight_extra_lines_styles[$i])) {
3926                             $parsed_code .= "<span class=\"xtra lx$i\">";
3927                         } else {
3928                             $parsed_code .= "<span class=\"xtra ln-xtra\">";
3929                         }
3930                     } else {
3931                         $parsed_code .= "<span style=\"display:block;" . $this->get_line_style($i) . "\">";
3932                     }
3933                     ++$close;
3934                 }
3935
3936                 $parsed_code .= $code[$i];
3937
3938                 if ($close) {
3939                   $parsed_code .= str_repeat('</span>', $close);
3940                   $close = 0;
3941                 }
3942                 elseif ($i + 1 < $n) {
3943                     $parsed_code .= "\n";
3944                 }
3945                 unset($code[$i]);
3946             }
3947
3948             if ($this->header_type == GESHI_HEADER_PRE_VALID || $this->header_type == GESHI_HEADER_PRE_TABLE) {
3949                 $parsed_code .= '</pre>';
3950             }
3951             if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
3952                 $parsed_code .= '</td>';
3953             }
3954         }
3955
3956         $parsed_code .= $this->footer();
3957     }
3958
3959     /**
3960      * Creates the header for the code block (with correct attributes)
3961      *
3962      * @return string The header for the code block
3963      * @since  1.0.0
3964      * @access private
3965      */
3966     function header() {
3967         // Get attributes needed
3968         /**
3969          * @todo   Document behaviour change - class is outputted regardless of whether
3970          *         we're using classes or not. Same with style
3971          */
3972         $attributes = ' class="' . $this->language;
3973         if ($this->overall_class != '') {
3974             $attributes .= " ".$this->overall_class;
3975         }
3976         $attributes .= '"';
3977
3978         if ($this->overall_id != '') {
3979             $attributes .= " id=\"{$this->overall_id}\"";
3980         }
3981         if ($this->overall_style != '') {
3982             $attributes .= ' style="' . $this->overall_style . '"';
3983         }
3984
3985         $ol_attributes = '';
3986
3987         if ($this->line_numbers_start != 1) {
3988             $ol_attributes .= ' start="' . $this->line_numbers_start . '"';
3989         }
3990
3991         // Get the header HTML
3992         $header = $this->header_content;
3993         if ($header) {
3994             if ($this->header_type == GESHI_HEADER_PRE || $this->header_type == GESHI_HEADER_PRE_VALID) {
3995                 $header = str_replace("\n", '', $header);
3996             }
3997             $header = $this->replace_keywords($header);
3998
3999             if ($this->use_classes) {
4000                 $attr = ' class="head"';
4001             } else {
4002                 $attr = " style=\"{$this->header_content_style}\"";
4003             }
4004             if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4005                 $header = "<thead><tr><td colspan=\"2\" $attr>$header</td></tr></thead>";
4006             } else {
4007                 $header = "<div$attr>$header</div>";
4008             }
4009         }
4010
4011         if (GESHI_HEADER_NONE == $this->header_type) {
4012             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4013                 return "$header<ol$attributes$ol_attributes>";
4014             }
4015             return $header . ($this->force_code_block ? '<div>' : '');
4016         }
4017
4018         // Work out what to return and do it
4019         if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4020             if ($this->header_type == GESHI_HEADER_PRE) {
4021                 return "<pre$attributes>$header<ol$ol_attributes>";
4022             } else if ($this->header_type == GESHI_HEADER_DIV ||
4023                 $this->header_type == GESHI_HEADER_PRE_VALID) {
4024                 return "<div$attributes>$header<ol$ol_attributes>";
4025             } else if ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4026                 return "<table$attributes>$header<tbody><tr class=\"li1\">";
4027             }
4028         } else {
4029             if ($this->header_type == GESHI_HEADER_PRE) {
4030                 return "<pre$attributes>$header"  .
4031                     ($this->force_code_block ? '<div>' : '');
4032             } else {
4033                 return "<div$attributes>$header" .
4034                     ($this->force_code_block ? '<div>' : '');
4035             }
4036         }
4037     }
4038
4039     /**
4040      * Returns the footer for the code block.
4041      *
4042      * @return string The footer for the code block
4043      * @since  1.0.0
4044      * @access private
4045      */
4046     function footer() {
4047         $footer = $this->footer_content;
4048         if ($footer) {
4049             if ($this->header_type == GESHI_HEADER_PRE) {
4050                 $footer = str_replace("\n", '', $footer);;
4051             }
4052             $footer = $this->replace_keywords($footer);
4053
4054             if ($this->use_classes) {
4055                 $attr = ' class="foot"';
4056             } else {
4057                 $attr = " style=\"{$this->footer_content_style}\"";
4058             }
4059             if ($this->header_type == GESHI_HEADER_PRE_TABLE && $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4060                 $footer = "<tfoot><tr><td colspan=\"2\">$footer</td></tr></tfoot>";
4061             } else {
4062                 $footer = "<div$attr>$footer</div>";
4063             }
4064         }
4065
4066         if (GESHI_HEADER_NONE == $this->header_type) {
4067             return ($this->line_numbers != GESHI_NO_LINE_NUMBERS) ? '</ol>' . $footer : $footer;
4068         }
4069
4070         if ($this->header_type == GESHI_HEADER_DIV || $this->header_type == GESHI_HEADER_PRE_VALID) {
4071             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4072                 return "</ol>$footer</div>";
4073             }
4074             return ($this->force_code_block ? '</div>' : '') .
4075                 "$footer</div>";
4076         }
4077         elseif ($this->header_type == GESHI_HEADER_PRE_TABLE) {
4078             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4079                 return "</tr></tbody>$footer</table>";
4080             }
4081             return ($this->force_code_block ? '</div>' : '') .
4082                 "$footer</div>";
4083         }
4084         else {
4085             if ($this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4086                 return "</ol>$footer</pre>";
4087             }
4088             return ($this->force_code_block ? '</div>' : '') .
4089                 "$footer</pre>";
4090         }
4091     }
4092
4093     /**
4094      * Replaces certain keywords in the header and footer with
4095      * certain configuration values
4096      *
4097      * @param  string The header or footer content to do replacement on
4098      * @return string The header or footer with replaced keywords
4099      * @since  1.0.2
4100      * @access private
4101      */
4102     function replace_keywords($instr) {
4103         $keywords = $replacements = array();
4104
4105         $keywords[] = '<TIME>';
4106         $keywords[] = '{TIME}';
4107         $replacements[] = $replacements[] = number_format($time = $this->get_time(), 3);
4108
4109         $keywords[] = '<LANGUAGE>';
4110         $keywords[] = '{LANGUAGE}';
4111         $replacements[] = $replacements[] = $this->language_data['LANG_NAME'];
4112
4113         $keywords[] = '<VERSION>';
4114         $keywords[] = '{VERSION}';
4115         $replacements[] = $replacements[] = GESHI_VERSION;
4116
4117         $keywords[] = '<SPEED>';
4118         $keywords[] = '{SPEED}';
4119         if ($time <= 0) {
4120             $speed = 'N/A';
4121         } else {
4122             $speed = strlen($this->source) / $time;
4123             if ($speed >= 1024) {
4124                 $speed = sprintf("%.2f KB/s", $speed / 1024.0);
4125             } else {
4126                 $speed = sprintf("%.0f B/s", $speed);
4127             }
4128         }
4129         $replacements[] = $replacements[] = $speed;
4130
4131         return str_replace($keywords, $replacements, $instr);
4132     }
4133
4134     /**
4135      * Secure replacement for PHP built-in function htmlspecialchars().
4136      *
4137      * See ticket #427 (http://wush.net/trac/wikka/ticket/427) for the rationale
4138      * for this replacement function.
4139      *
4140      * The INTERFACE for this function is almost the same as that for
4141      * htmlspecialchars(), with the same default for quote style; however, there
4142      * is no 'charset' parameter. The reason for this is as follows:
4143      *
4144      * The PHP docs say:
4145      *      "The third argument charset defines character set used in conversion."
4146      *
4147      * I suspect PHP's htmlspecialchars() is working at the byte-value level and
4148      * thus _needs_ to know (or asssume) a character set because the special
4149      * characters to be replaced could exist at different code points in
4150      * different character sets. (If indeed htmlspecialchars() works at
4151      * byte-value level that goes some  way towards explaining why the
4152      * vulnerability would exist in this function, too, and not only in
4153      * htmlentities() which certainly is working at byte-value level.)
4154      *
4155      * This replacement function however works at character level and should
4156      * therefore be "immune" to character set differences - so no charset
4157      * parameter is needed or provided. If a third parameter is passed, it will
4158      * be silently ignored.
4159      *
4160      * In the OUTPUT there is a minor difference in that we use '&#39;' instead
4161      * of PHP's '&#039;' for a single quote: this provides compatibility with
4162      *      get_html_translation_table(HTML_SPECIALCHARS, ENT_QUOTES)
4163      * (see comment by mikiwoz at yahoo dot co dot uk on
4164      * http://php.net/htmlspecialchars); it also matches the entity definition
4165      * for XML 1.0
4166      * (http://www.w3.org/TR/xhtml1/dtds.html#a_dtd_Special_characters).
4167      * Like PHP we use a numeric character reference instead of '&apos;' for the
4168      * single quote. For the other special characters we use the named entity
4169      * references, as PHP is doing.
4170      *
4171      * @author      {@link http://wikkawiki.org/JavaWoman Marjolein Katsma}
4172      *
4173      * @license     http://www.gnu.org/copyleft/lgpl.html
4174      *              GNU Lesser General Public License
4175      * @copyright   Copyright 2007, {@link http://wikkawiki.org/CreditsPage
4176      *              Wikka Development Team}
4177      *
4178      * @access      private
4179      * @param       string  $string string to be converted
4180      * @param       integer $quote_style
4181      *                      - ENT_COMPAT:   escapes &, <, > and double quote (default)
4182      *                      - ENT_NOQUOTES: escapes only &, < and >
4183      *                      - ENT_QUOTES:   escapes &, <, >, double and single quotes
4184      * @return      string  converted string
4185      * @since       1.0.7.18
4186      */
4187     function hsc($string, $quote_style = ENT_COMPAT) {
4188         // init
4189         static $aTransSpecchar = array(
4190             '&' => '&amp;',
4191             '"' => '&quot;',
4192             '<' => '&lt;',
4193             '>' => '&gt;',
4194
4195             //This fix is related to SF#1923020, but has to be applied
4196             //regardless of actually highlighting symbols.
4197
4198             //Circumvent a bug with symbol highlighting
4199             //This is required as ; would produce undesirable side-effects if it
4200             //was not to be processed as an entity.
4201             ';' => '<SEMI>', // Force ; to be processed as entity
4202             '|' => '<PIPE>' // Force | to be processed as entity
4203             );                      // ENT_COMPAT set
4204
4205         switch ($quote_style) {
4206             case ENT_NOQUOTES: // don't convert double quotes
4207                 unset($aTransSpecchar['"']);
4208                 break;
4209             case ENT_QUOTES: // convert single quotes as well
4210                 $aTransSpecchar["'"] = '&#39;'; // (apos) htmlspecialchars() uses '&#039;'
4211                 break;
4212         }
4213
4214         // return translated string
4215         return strtr($string, $aTransSpecchar);
4216     }
4217
4218     /**
4219      * Returns a stylesheet for the highlighted code. If $economy mode
4220      * is true, we only return the stylesheet declarations that matter for
4221      * this code block instead of the whole thing
4222      *
4223      * @param  boolean Whether to use economy mode or not
4224      * @return string A stylesheet built on the data for the current language
4225      * @since  1.0.0
4226      */
4227     function get_stylesheet($economy_mode = true) {
4228         // If there's an error, chances are that the language file
4229         // won't have populated the language data file, so we can't
4230         // risk getting a stylesheet...
4231         if ($this->error) {
4232             return '';
4233         }
4234
4235         //Check if the style rearrangements have been processed ...
4236         //This also does some preprocessing to check which style groups are useable ...
4237         if(!isset($this->language_data['NUMBERS_CACHE'])) {
4238             $this->build_style_cache();
4239         }
4240
4241         // First, work out what the selector should be. If there's an ID,
4242         // that should be used, the same for a class. Otherwise, a selector
4243         // of '' means that these styles will be applied anywhere
4244         if ($this->overall_id) {
4245             $selector = '#' . $this->overall_id;
4246         } else {
4247             $selector = '.' . $this->language;
4248             if ($this->overall_class) {
4249                 $selector .= '.' . $this->overall_class;
4250             }
4251         }
4252         $selector .= ' ';
4253
4254         // Header of the stylesheet
4255         if (!$economy_mode) {
4256             $stylesheet = "/**\n".
4257                 " * GeSHi Dynamically Generated Stylesheet\n".
4258                 " * --------------------------------------\n".
4259                 " * Dynamically generated stylesheet for {$this->language}\n".
4260                 " * CSS class: {$this->overall_class}, CSS id: {$this->overall_id}\n".
4261                 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4262                 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4263                 " * --------------------------------------\n".
4264                 " */\n";
4265         } else {
4266             $stylesheet = "/**\n".
4267                 " * GeSHi (C) 2004 - 2007 Nigel McNie, 2007 - 2008 Benny Baumann\n" .
4268                 " * (http://qbnz.com/highlighter/ and http://geshi.org/)\n".
4269                 " */\n";
4270         }
4271
4272         // Set the <ol> to have no effect at all if there are line numbers
4273         // (<ol>s have margins that should be destroyed so all layout is
4274         // controlled by the set_overall_style method, which works on the
4275         // <pre> or <div> container). Additionally, set default styles for lines
4276         if (!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) {
4277             //$stylesheet .= "$selector, {$selector}ol, {$selector}ol li {margin: 0;}\n";
4278             $stylesheet .= "$selector.de1, $selector.de2 {{$this->code_style}}\n";
4279         }
4280
4281         // Add overall styles
4282         // note: neglect economy_mode, empty styles are meaningless
4283         if ($this->overall_style != '') {
4284             $stylesheet .= "$selector {{$this->overall_style}}\n";
4285         }
4286
4287         // Add styles for links
4288         // note: economy mode does not make _any_ sense here
4289         //       either the style is empty and thus no selector is needed
4290         //       or the appropriate key is given.
4291         foreach ($this->link_styles as $key => $style) {
4292             if ($style != '') {
4293                 switch ($key) {
4294                     case GESHI_LINK:
4295                         $stylesheet .= "{$selector}a:link {{$style}}\n";
4296                         break;
4297                     case GESHI_HOVER:
4298                         $stylesheet .= "{$selector}a:hover {{$style}}\n";
4299                         break;
4300                     case GESHI_ACTIVE:
4301                         $stylesheet .= "{$selector}a:active {{$style}}\n";
4302                         break;
4303                     case GESHI_VISITED:
4304                         $stylesheet .= "{$selector}a:visited {{$style}}\n";
4305                         break;
4306                 }
4307             }
4308         }
4309
4310         // Header and footer
4311         // note: neglect economy_mode, empty styles are meaningless
4312         if ($this->header_content_style != '') {
4313             $stylesheet .= "$selector.head {{$this->header_content_style}}\n";
4314         }
4315         if ($this->footer_content_style != '') {
4316             $stylesheet .= "$selector.foot {{$this->footer_content_style}}\n";
4317         }
4318
4319         // Styles for important stuff
4320         // note: neglect economy_mode, empty styles are meaningless
4321         if ($this->important_styles != '') {
4322             $stylesheet .= "$selector.imp {{$this->important_styles}}\n";
4323         }
4324
4325         // Simple line number styles
4326         if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->line_style1 != '') {
4327             $stylesheet .= "{$selector}li, {$selector}.li1 {{$this->line_style1}}\n";
4328         }
4329         if ((!$economy_mode || $this->line_numbers != GESHI_NO_LINE_NUMBERS) && $this->table_linenumber_style != '') {
4330             $stylesheet .= "{$selector}.ln {{$this->table_linenumber_style}}\n";
4331         }
4332         // If there is a style set for fancy line numbers, echo it out
4333         if ((!$economy_mode || $this->line_numbers == GESHI_FANCY_LINE_NUMBERS) && $this->line_style2 != '') {
4334             $stylesheet .= "{$selector}.li2 {{$this->line_style2}}\n";
4335         }
4336
4337         // note: empty styles are meaningless
4338         foreach ($this->language_data['STYLES']['KEYWORDS'] as $group => $styles) {
4339             if ($styles != '' && (!$economy_mode ||
4340                 (isset($this->lexic_permissions['KEYWORDS'][$group]) &&
4341                 $this->lexic_permissions['KEYWORDS'][$group]))) {
4342                 $stylesheet .= "$selector.kw$group {{$styles}}\n";
4343             }
4344         }
4345         foreach ($this->language_data['STYLES']['COMMENTS'] as $group => $styles) {
4346             if ($styles != '' && (!$economy_mode ||
4347                 (isset($this->lexic_permissions['COMMENTS'][$group]) &&
4348                 $this->lexic_permissions['COMMENTS'][$group]) ||
4349                 (!empty($this->language_data['COMMENT_REGEXP']) &&
4350                 !empty($this->language_data['COMMENT_REGEXP'][$group])))) {
4351                 $stylesheet .= "$selector.co$group {{$styles}}\n";
4352             }
4353         }
4354         foreach ($this->language_data['STYLES']['ESCAPE_CHAR'] as $group => $styles) {
4355             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['ESCAPE_CHAR'])) {
4356                 // NEW: since 1.0.8 we have to handle hardescapes
4357                 if ($group === 'HARD') {
4358                     $group = '_h';
4359                 }
4360                 $stylesheet .= "$selector.es$group {{$styles}}\n";
4361             }
4362         }
4363         foreach ($this->language_data['STYLES']['BRACKETS'] as $group => $styles) {
4364             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['BRACKETS'])) {
4365                 $stylesheet .= "$selector.br$group {{$styles}}\n";
4366             }
4367         }
4368         foreach ($this->language_data['STYLES']['SYMBOLS'] as $group => $styles) {
4369             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['SYMBOLS'])) {
4370                 $stylesheet .= "$selector.sy$group {{$styles}}\n";
4371             }
4372         }
4373         foreach ($this->language_data['STYLES']['STRINGS'] as $group => $styles) {
4374             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['STRINGS'])) {
4375                 // NEW: since 1.0.8 we have to handle hardquotes
4376                 if ($group === 'HARD') {
4377                     $group = '_h';
4378                 }
4379                 $stylesheet .= "$selector.st$group {{$styles}}\n";
4380             }
4381         }
4382         foreach ($this->language_data['STYLES']['NUMBERS'] as $group => $styles) {
4383             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['NUMBERS'])) {
4384                 $stylesheet .= "$selector.nu$group {{$styles}}\n";
4385             }
4386         }
4387         foreach ($this->language_data['STYLES']['METHODS'] as $group => $styles) {
4388             if ($styles != '' && (!$economy_mode || $this->lexic_permissions['METHODS'])) {
4389                 $stylesheet .= "$selector.me$group {{$styles}}\n";
4390             }
4391         }
4392         // note: neglect economy_mode, empty styles are meaningless
4393         foreach ($this->language_data['STYLES']['SCRIPT'] as $group => $styles) {
4394             if ($styles != '') {
4395                 $stylesheet .= "$selector.sc$group {{$styles}}\n";
4396             }
4397         }
4398         foreach ($this->language_data['STYLES']['REGEXPS'] as $group => $styles) {
4399             if ($styles != '' && (!$economy_mode ||
4400                 (isset($this->lexic_permissions['REGEXPS'][$group]) &&
4401                 $this->lexic_permissions['REGEXPS'][$group]))) {
4402                 if (is_array($this->language_data['REGEXPS'][$group]) &&
4403                     array_key_exists(GESHI_CLASS, $this->language_data['REGEXPS'][$group])) {
4404                     $stylesheet .= "$selector.";
4405                     $stylesheet .= $this->language_data['REGEXPS'][$group][GESHI_CLASS];
4406                     $stylesheet .= " {{$styles}}\n";
4407                 } else {
4408                     $stylesheet .= "$selector.re$group {{$styles}}\n";
4409                 }
4410             }
4411         }
4412         // Styles for lines being highlighted extra
4413         if (!$economy_mode || (count($this->highlight_extra_lines)!=count($this->highlight_extra_lines_styles))) {
4414             $stylesheet .= "{$selector}.ln-xtra, {$selector}li.ln-xtra, {$selector}div.ln-xtra {{$this->highlight_extra_lines_style}}\n";
4415         }
4416         $stylesheet .= "{$selector}span.xtra { display:block; }\n";
4417         foreach ($this->highlight_extra_lines_styles as $lineid => $linestyle) {
4418             $stylesheet .= "{$selector}.lx$lineid, {$selector}li.lx$lineid, {$selector}div.lx$lineid {{$linestyle}}\n";
4419         }
4420
4421         return $stylesheet;
4422     }
4423
4424     /**
4425      * Get's the style that is used for the specified line
4426      *
4427      * @param int The line number information is requested for
4428      * @access private
4429      * @since 1.0.7.21
4430      */
4431     function get_line_style($line) {
4432         //$style = null;
4433         $style = null;
4434         if (isset($this->highlight_extra_lines_styles[$line])) {
4435             $style = $this->highlight_extra_lines_styles[$line];
4436         } else { // if no "extra" style assigned
4437             $style = $this->highlight_extra_lines_style;
4438         }
4439
4440         return $style;
4441     }
4442
4443     /**
4444     * this functions creates an optimized regular expression list
4445     * of an array of strings.
4446     *
4447     * Example:
4448     * <code>$list = array('faa', 'foo', 'foobar');
4449     *          => string 'f(aa|oo(bar)?)'</code>
4450     *
4451     * @param $list array of (unquoted) strings
4452     * @param $regexp_delimiter your regular expression delimiter, @see preg_quote()
4453     * @return string for regular expression
4454     * @author Milian Wolff <mail@milianw.de>
4455     * @since 1.0.8
4456     * @access private
4457     */
4458     function optimize_regexp_list($list, $regexp_delimiter = '/') {
4459         $regex_chars = array('.', '\\', '+', '*', '?', '[', '^', ']', '$',
4460             '(', ')', '{', '}', '=', '!', '<', '>', '|', ':', $regexp_delimiter);
4461         sort($list);
4462         $regexp_list = array('');
4463         $num_subpatterns = 0;
4464         $list_key = 0;
4465
4466         // the tokens which we will use to generate the regexp list
4467         $tokens = array();
4468         $prev_keys = array();
4469         // go through all entries of the list and generate the token list
4470         $cur_len = 0;
4471         for ($i = 0, $i_max = count($list); $i < $i_max; ++$i) {
4472             if ($cur_len > GESHI_MAX_PCRE_LENGTH) {
4473                 // seems like the length of this pcre is growing exorbitantly
4474                 $regexp_list[++$list_key] = $this->_optimize_regexp_list_tokens_to_string($tokens);
4475                 $num_subpatterns = substr_count($regexp_list[$list_key], '(?:');
4476                 $tokens = array();
4477                 $cur_len = 0;
4478             }
4479             $level = 0;
4480             $entry = preg_quote((string) $list[$i], $regexp_delimiter);
4481             $pointer = &$tokens;
4482             // properly assign the new entry to the correct position in the token array
4483             // possibly generate smaller common denominator keys
4484             while (true) {
4485                 // get the common denominator
4486                 if (isset($prev_keys[$level])) {
4487                     if ($prev_keys[$level] == $entry) {
4488                         // this is a duplicate entry, skip it
4489                         continue 2;
4490                     }
4491                     $char = 0;
4492                     while (isset($entry[$char]) && isset($prev_keys[$level][$char])
4493                             && $entry[$char] == $prev_keys[$level][$char]) {
4494                         ++$char;
4495                     }
4496                     if ($char > 0) {
4497                         // this entry has at least some chars in common with the current key
4498                         if ($char == strlen($prev_keys[$level])) {
4499                             // current key is totally matched, i.e. this entry has just some bits appended
4500                             $pointer = &$pointer[$prev_keys[$level]];
4501                         } else {
4502                             // only part of the keys match
4503                             $new_key_part1 = substr($prev_keys[$level], 0, $char);
4504                             $new_key_part2 = substr($prev_keys[$level], $char);
4505
4506                             if (in_array($new_key_part1[0], $regex_chars)
4507                                 || in_array($new_key_part2[0], $regex_chars)) {
4508                                 // this is bad, a regex char as first character
4509                                 $pointer[$entry] = array('' => true);
4510                                 array_splice($prev_keys, $level, count($prev_keys), $entry);
4511                                 $cur_len += strlen($entry);
4512                                 continue;
4513                             } else {
4514                                 // relocate previous tokens
4515                                 $pointer[$new_key_part1] = array($new_key_part2 => $pointer[$prev_keys[$level]]);
4516                                 unset($pointer[$prev_keys[$level]]);
4517                                 $pointer = &$pointer[$new_key_part1];
4518                                 // recreate key index
4519                                 array_splice($prev_keys, $level, count($prev_keys), array($new_key_part1, $new_key_part2));
4520                                 $cur_len += strlen($new_key_part2);
4521                             }
4522                         }
4523                         ++$level;
4524                         $entry = substr($entry, $char);
4525                         continue;
4526                     }
4527                     // else: fall trough, i.e. no common denominator was found
4528                 }
4529                 if ($level == 0 && !empty($tokens)) {
4530                     // we can dump current tokens into the string and throw them away afterwards
4531                     $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4532                     $new_subpatterns = substr_count($new_entry, '(?:');
4533                     if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + $new_subpatterns > GESHI_MAX_PCRE_SUBPATTERNS) {
4534                         $regexp_list[++$list_key] = $new_entry;
4535                         $num_subpatterns = $new_subpatterns;
4536                     } else {
4537                         if (!empty($regexp_list[$list_key])) {
4538                             $new_entry = '|' . $new_entry;
4539                         }
4540                         $regexp_list[$list_key] .= $new_entry;
4541                         $num_subpatterns += $new_subpatterns;
4542                     }
4543                     $tokens = array();
4544                     $cur_len = 0;
4545                 }
4546                 // no further common denominator found
4547                 $pointer[$entry] = array('' => true);
4548                 array_splice($prev_keys, $level, count($prev_keys), $entry);
4549
4550                 $cur_len += strlen($entry);
4551                 break;
4552             }
4553             unset($list[$i]);
4554         }
4555         // make sure the last tokens get converted as well
4556         $new_entry = $this->_optimize_regexp_list_tokens_to_string($tokens);
4557         if (GESHI_MAX_PCRE_SUBPATTERNS && $num_subpatterns + substr_count($new_entry, '(?:') > GESHI_MAX_PCRE_SUBPATTERNS) {
4558             $regexp_list[++$list_key] = $new_entry;
4559         } else {
4560             if (!empty($regexp_list[$list_key])) {
4561                 $new_entry = '|' . $new_entry;
4562             }
4563             $regexp_list[$list_key] .= $new_entry;
4564         }
4565         return $regexp_list;
4566     }
4567     /**
4568     * this function creates the appropriate regexp string of an token array
4569     * you should not call this function directly, @see $this->optimize_regexp_list().
4570     *
4571     * @param &$tokens array of tokens
4572     * @param $recursed bool to know wether we recursed or not
4573     * @return string
4574     * @author Milian Wolff <mail@milianw.de>
4575     * @since 1.0.8
4576     * @access private
4577     */
4578     function _optimize_regexp_list_tokens_to_string(&$tokens, $recursed = false) {
4579         $list = '';
4580         foreach ($tokens as $token => $sub_tokens) {
4581             $list .= $token;
4582             $close_entry = isset($sub_tokens['']);
4583             unset($sub_tokens['']);
4584             if (!empty($sub_tokens)) {
4585                 $list .= '(?:' . $this->_optimize_regexp_list_tokens_to_string($sub_tokens, true) . ')';
4586                 if ($close_entry) {
4587                     // make sub_tokens optional
4588                     $list .= '?';
4589                 }
4590             }
4591             $list .= '|';
4592         }
4593         if (!$recursed) {
4594             // do some optimizations
4595             // common trailing strings
4596             // BUGGY!
4597             //$list = preg_replace_callback('#(?<=^|\:|\|)\w+?(\w+)(?:\|.+\1)+(?=\|)#', create_function(
4598             //    '$matches', 'return "(?:" . preg_replace("#" . preg_quote($matches[1], "#") . "(?=\||$)#", "", $matches[0]) . ")" . $matches[1];'), $list);
4599             // (?:p)? => p?
4600             $list = preg_replace('#\(\?\:(.)\)\?#', '\1?', $list);
4601             // (?:a|b|c|d|...)? => [abcd...]?
4602             // TODO: a|bb|c => [ac]|bb
4603             static $callback_2;
4604             if (!isset($callback_2)) {
4605                 $callback_2 = create_function('$matches', 'return "[" . str_replace("|", "", $matches[1]) . "]";');
4606             }
4607             $list = preg_replace_callback('#\(\?\:((?:.\|)+.)\)#', $callback_2, $list);
4608         }
4609         // return $list without trailing pipe
4610         return substr($list, 0, -1);
4611     }
4612 } // End Class GeSHi
4613
4614
4615 if (!function_exists('geshi_highlight')) {
4616     /**
4617      * Easy way to highlight stuff. Behaves just like highlight_string
4618      *
4619      * @param string The code to highlight
4620      * @param string The language to highlight the code in
4621      * @param string The path to the language files. You can leave this blank if you need
4622      *               as from version 1.0.7 the path should be automatically detected
4623      * @param boolean Whether to return the result or to echo
4624      * @return string The code highlighted (if $return is true)
4625      * @since 1.0.2
4626      */
4627     function geshi_highlight($string, $language, $path = null, $return = false) {
4628         $geshi = new GeSHi($string, $language, $path);
4629         $geshi->set_header_type(GESHI_HEADER_NONE);
4630
4631         if ($return) {
4632             return '<code>' . $geshi->parse_code() . '</code>';
4633         }
4634
4635         echo '<code>' . $geshi->parse_code() . '</code>';
4636
4637         if ($geshi->error()) {
4638             return false;
4639         }
4640         return true;
4641     }
4642 }
4643
4644 ?>