Merge branch 'MOODLE_30_STABLE' of https://github.com/yuriy-os/moodle into MOODLE_30_...
[moodle.git] / lib / lexer.php
blob6a21885d2e08a5e39e52a62e1fbe4b6acfa7d53f
1 <?php
3 /**
4 * PHP lexer code snarfed from the CVS tree for the lamplib project at
5 * http://sourceforge.net/projects/lamplib
6 * This project is administered by Markus Baker, Harry Fuecks and Matt
7 * Mitchell, and the project code is in the public domain.
8 *
9 * Thanks, guys!
11 * @package moodlecore
12 * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
13 * @license Public Domain {@link http://sourceforge.net/projects/lamplib}
16 /** LEXER_ENTER = 1 */
17 define("LEXER_ENTER", 1);
18 /** LEXER_MATCHED = 2 */
19 define("LEXER_MATCHED", 2);
20 /** LEXER_UNMATCHED = 3 */
21 define("LEXER_UNMATCHED", 3);
22 /** LEXER_EXIT = 4 */
23 define("LEXER_EXIT", 4);
24 /** LEXER_SPECIAL = 5 */
25 define("LEXER_SPECIAL", 5);
27 /**
28 * Compounded regular expression. Any of
29 * the contained patterns could match and
30 * when one does it's label is returned.
31 * @package moodlecore
32 * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
33 * @license Public Domain {@link http://sourceforge.net/projects/lamplib}
35 class ParallelRegex {
36 var $_patterns;
37 var $_labels;
38 var $_regex;
39 var $_case;
41 /**
42 * Constructor. Starts with no patterns.
43 * @param bool $case True for case sensitive, false
44 * for insensitive.
45 * @access public
47 public function __construct($case) {
48 $this->_case = $case;
49 $this->_patterns = array();
50 $this->_labels = array();
51 $this->_regex = null;
54 /**
55 * Old syntax of class constructor. Deprecated in PHP7.
57 public function ParallelRegex($case) {
58 self::__construct($case);
61 /**
62 * Adds a pattern with an optional label.
63 * @param string $pattern Perl style regex, but ( and )
64 * lose the usual meaning.
65 * @param string $label Label of regex to be returned
66 * on a match.
67 * @access public
69 function addPattern($pattern, $label = true) {
70 $count = count($this->_patterns);
71 $this->_patterns[$count] = $pattern;
72 $this->_labels[$count] = $label;
73 $this->_regex = null;
76 /**
77 * Attempts to match all patterns at once against
78 * a string.
79 * @param string $subject String to match against.
80 * @param string $match First matched portion of
81 * subject.
82 * @return bool True on success.
83 * @access public
85 function match($subject, &$match) {
86 if (count($this->_patterns) == 0) {
87 return false;
89 if (!preg_match($this->_getCompoundedRegex(), $subject, $matches)) {
90 $match = "";
91 return false;
93 $match = $matches[0];
94 for ($i = 1; $i < count($matches); $i++) {
95 if ($matches[$i]) {
96 return $this->_labels[$i - 1];
99 return true;
103 * Compounds the patterns into a single
104 * regular expression separated with the
105 * "or" operator. Caches the regex.
106 * Will automatically escape (, ) and / tokens.
107 * @access private
109 function _getCompoundedRegex() {
110 if ($this->_regex == null) {
111 for ($i = 0; $i < count($this->_patterns); $i++) {
112 $this->_patterns[$i] = '(' . str_replace(
113 array('/', '(', ')'),
114 array('\/', '\(', '\)'),
115 $this->_patterns[$i]) . ')';
117 $this->_regex = "/" . implode("|", $this->_patterns) . "/" . $this->_getPerlMatchingFlags();
119 return $this->_regex;
123 * Accessor for perl regex mode flags to use.
124 * @return string Flags as string.
125 * @access private
127 function _getPerlMatchingFlags() {
128 return ($this->_case ? "msS" : "msSi");
133 * States for a stack machine.
135 * @package moodlecore
136 * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
137 * @license Public Domain {@link http://sourceforge.net/projects/lamplib}
139 class StateStack {
140 var $_stack;
143 * Constructor. Starts in named state.
144 * @param string $start Starting state name.
145 * @access public
147 public function __construct($start) {
148 $this->_stack = array($start);
152 * Old syntax of class constructor. Deprecated in PHP7.
154 public function StateStack($start) {
155 self::__construct($start);
159 * Accessor for current state.
160 * @return string State as string.
161 * @access public
163 function getCurrent() {
164 return $this->_stack[count($this->_stack) - 1];
168 * Adds a state to the stack and sets it
169 * to be the current state.
170 * @param string $state New state.
171 * @access public
173 function enter($state) {
174 array_push($this->_stack, $state);
178 * Leaves the current state and reverts
179 * to the previous one.
180 * @return bool False if we drop off
181 * the bottom of the list.
182 * @access public
184 function leave() {
185 if (count($this->_stack) == 1) {
186 return false;
188 array_pop($this->_stack);
189 return true;
194 * Accepts text and breaks it into tokens.
195 * Some optimisation to make the sure the
196 * content is only scanned by the PHP regex
197 * parser once. Lexer modes must not start
198 * with leading underscores.
200 * @package moodlecore
201 * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
202 * @license Public Domain {@link http://sourceforge.net/projects/lamplib}
204 class Lexer {
205 var $_regexes;
206 var $_parser;
207 var $_mode;
208 var $_mode_handlers;
209 var $_case;
212 * Sets up the lexer in case insensitive matching
213 * by default.
214 * @param object $parser Handling strategy by
215 * reference.
216 * @param string $start Starting handler.
217 * @param bool $case True for case sensitive.
218 * @access public
220 public function __construct(&$parser, $start = "accept", $case = false) {
221 $this->_case = $case;
222 $this->_regexes = array();
223 $this->_parser = &$parser;
224 $this->_mode = new StateStack($start);
225 $this->_mode_handlers = array();
229 * Old syntax of class constructor for backward compatibility.
231 public function Lexer(&$parser, $start = "accept", $case = false) {
232 self::__construct($parser, $start, $case);
236 * Adds a token search pattern for a particular
237 * parsing mode. The pattern does not change the
238 * current mode.
239 * @param string $pattern Perl style regex, but ( and )
240 * lose the usual meaning.
241 * @param string $mode Should only apply this
242 * pattern when dealing with
243 * this type of input.
244 * @access public
246 function addPattern($pattern, $mode = "accept") {
247 if (!isset($this->_regexes[$mode])) {
248 $this->_regexes[$mode] = new ParallelRegex($this->_case);
250 $this->_regexes[$mode]->addPattern($pattern);
254 * Adds a pattern that will enter a new parsing
255 * mode. Useful for entering parenthesis, strings,
256 * tags, etc.
257 * @param string $pattern Perl style regex, but ( and )
258 * lose the usual meaning.
259 * @param string $mode Should only apply this
260 * pattern when dealing with
261 * this type of input.
262 * @param string $new_mode Change parsing to this new
263 * nested mode.
264 * @access public
266 function addEntryPattern($pattern, $mode, $new_mode) {
267 if (!isset($this->_regexes[$mode])) {
268 $this->_regexes[$mode] = new ParallelRegex($this->_case);
270 $this->_regexes[$mode]->addPattern($pattern, $new_mode);
274 * Adds a pattern that will exit the current mode
275 * and re-enter the previous one.
276 * @param string $pattern Perl style regex, but ( and )
277 * lose the usual meaning.
278 * @param string $mode Mode to leave.
279 * @access public
281 function addExitPattern($pattern, $mode) {
282 if (!isset($this->_regexes[$mode])) {
283 $this->_regexes[$mode] = new ParallelRegex($this->_case);
285 $this->_regexes[$mode]->addPattern($pattern, "__exit");
289 * Adds a pattern that has a special mode.
290 * Acts as an entry and exit pattern in one go.
291 * @param string $pattern Perl style regex, but ( and )
292 * lose the usual meaning.
293 * @param string $mode Should only apply this
294 * pattern when dealing with
295 * this type of input.
296 * @param string $special Use this mode for this one token.
297 * @access public
299 function addSpecialPattern($pattern, $mode, $special) {
300 if (!isset($this->_regexes[$mode])) {
301 $this->_regexes[$mode] = new ParallelRegex($this->_case);
303 $this->_regexes[$mode]->addPattern($pattern, "_$special");
307 * Adds a mapping from a mode to another handler.
308 * @param string $mode Mode to be remapped.
309 * @param string $handler New target handler.
310 * @access public
312 function mapHandler($mode, $handler) {
313 $this->_mode_handlers[$mode] = $handler;
317 * Splits the page text into tokens. Will fail
318 * if the handlers report an error or if no
319 * content is consumed. If successful then each
320 * unparsed and parsed token invokes a call to the
321 * held listener.
322 * @param string $raw Raw HTML text.
323 * @return bool True on success, else false.
324 * @access public
326 function parse($raw) {
327 if (!isset($this->_parser)) {
328 return false;
330 $length = strlen($raw);
331 while (is_array($parsed = $this->_reduce($raw))) {
332 list($unmatched, $matched, $mode) = $parsed;
333 if (!$this->_dispatchTokens($unmatched, $matched, $mode)) {
334 return false;
336 if (strlen($raw) == $length) {
337 return false;
339 $length = strlen($raw);
341 if (!$parsed) {
342 return false;
344 return $this->_invokeParser($raw, LEXER_UNMATCHED);
348 * Sends the matched token and any leading unmatched
349 * text to the parser changing the lexer to a new
350 * mode if one is listed.
351 * @param string $unmatched Unmatched leading portion.
352 * @param string $matched Actual token match.
353 * @param string $mode Mode after match. The "_exit"
354 * mode causes a stack pop. An
355 * false mode causes no change.
356 * @return bool False if there was any error
357 * from the parser.
358 * @access private
360 function _dispatchTokens($unmatched, $matched, $mode = false) {
361 if (!$this->_invokeParser($unmatched, LEXER_UNMATCHED)) {
362 return false;
364 if ($mode === "__exit") {
365 if (!$this->_invokeParser($matched, LEXER_EXIT)) {
366 return false;
368 return $this->_mode->leave();
370 if (strncmp($mode, "_", 1) == 0) {
371 $mode = substr($mode, 1);
372 $this->_mode->enter($mode);
373 if (!$this->_invokeParser($matched, LEXER_SPECIAL)) {
374 return false;
376 return $this->_mode->leave();
378 if (is_string($mode)) {
379 $this->_mode->enter($mode);
380 return $this->_invokeParser($matched, LEXER_ENTER);
382 return $this->_invokeParser($matched, LEXER_MATCHED);
386 * Calls the parser method named after the current
387 * mode. Empty content will be ignored.
388 * @param string $content Text parsed.
389 * @param string $is_match Token is recognised rather
390 * than unparsed data.
391 * @access private
393 function _invokeParser($content, $is_match) {
394 if (($content === "") || ($content === false)) {
395 return true;
397 $handler = $this->_mode->getCurrent();
398 if (isset($this->_mode_handlers[$handler])) {
399 $handler = $this->_mode_handlers[$handler];
401 return $this->_parser->$handler($content, $is_match);
405 * Tries to match a chunk of text and if successful
406 * removes the recognised chunk and any leading
407 * unparsed data. Empty strings will not be matched.
408 * @param string $raw The subject to parse. This is the
409 * content that will be eaten.
410 * @return bool|array Three item list of unparsed
411 * content followed by the
412 * recognised token and finally the
413 * action the parser is to take.
414 * True if no match, false if there
415 * is a parsing error.
416 * @access private
418 function _reduce(&$raw) {
419 if (!isset($this->_regexes[$this->_mode->getCurrent()])) {
420 return false;
422 if ($raw === "") {
423 return true;
425 if ($action = $this->_regexes[$this->_mode->getCurrent()]->match($raw, $match)) {
426 $count = strpos($raw, $match);
427 $unparsed = substr($raw, 0, $count);
428 $raw = substr($raw, $count + strlen($match));
429 return array($unparsed, $match, $action);
431 return true;