Merge branch 'MDL-70075-39-2' of git://github.com/andrewnicols/moodle into MOODLE_39_...
[moodle.git] / lib / searchlib.php
blob440eb4b9f29a95b874182bab76ed16042d8f5b4a
1 <?php
3 // This file is part of Moodle - http://moodle.org/
4 //
5 // Moodle is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // Moodle is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
18 /**
19 * @package core
20 * @subpackage search
21 * @copyright 1999 onwards Martin Dougiamas {@link http://moodle.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25 defined('MOODLE_INTERNAL') || die();
27 /** @see lexer.php */
28 require_once($CFG->libdir.'/lexer.php');
30 /** Constants for the various types of tokens */
32 define("TOKEN_USER","0");
33 define("TOKEN_META","1");
34 define("TOKEN_EXACT","2");
35 define("TOKEN_NEGATE","3");
36 define("TOKEN_STRING","4");
37 define("TOKEN_USERID","5");
38 define("TOKEN_DATEFROM","6");
39 define("TOKEN_DATETO","7");
40 define("TOKEN_INSTANCE","8");
41 define("TOKEN_TAGS","9");
43 /**
44 * Class to hold token/value pairs after they're parsed.
46 * @package moodlecore
47 * @copyright 1999 onwards Martin Dougiamas {@link http://moodle.com}
48 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
50 class search_token {
51 private $value;
52 private $type;
54 public function __construct($type,$value){
55 $this->type = $type;
56 $this->value = $this->sanitize($value);
60 /**
61 * Old syntax of class constructor. Deprecated in PHP7.
63 * @deprecated since Moodle 3.1
65 public function search_token($type, $value) {
66 debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER);
67 self::__construct($type, $value);
70 // Try to clean up user input to avoid potential security issues.
71 // Need to think about this some more.
73 function sanitize($userstring){
74 return htmlspecialchars($userstring);
76 function getValue(){
77 return $this->value;
79 function getType(){
80 return $this->type;
85 /**
86 * This class does the heavy lifting of lexing the search string into tokens.
87 * Using a full-blown lexer is probably overkill for this application, but
88 * might be useful for other tasks.
90 * @package moodlecore
91 * @copyright 1999 onwards Martin Dougiamas {@link http://moodle.com}
92 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
94 class search_lexer extends Lexer{
96 public function __construct(&$parser){
98 // Call parent constructor.
99 parent::__construct($parser);
101 //Set up the state machine and pattern matches for transitions.
103 // Patterns to handle strings of the form datefrom:foo
105 // If we see the string datefrom: while in the base accept state, start
106 // parsing a username and go to the indatefrom state.
107 $this->addEntryPattern("datefrom:\S+","accept","indatefrom");
109 // Snarf everything into the username until we see whitespace, then exit
110 // back to the base accept state.
111 $this->addExitPattern("\s","indatefrom");
114 // If we see the string tags: while in the base accept state, start
115 // parsing tags and go to the intags state.
116 $this->addEntryPattern("tags:\S+","accept","intags");
118 // Snarf everything into the tags until we see whitespace, then exit
119 // back to the base accept state.
120 $this->addExitPattern("\s","intags");
122 // Patterns to handle strings of the form dateto:foo
124 // If we see the string dateto: while in the base accept state, start
125 // parsing a username and go to the indateto state.
126 $this->addEntryPattern("dateto:\S+","accept","indateto");
128 // Snarf everything into the username until we see whitespace, then exit
129 // back to the base accept state.
130 $this->addExitPattern("\s","indateto");
133 // Patterns to handle strings of the form instance:foo
135 // If we see the string instance: while in the base accept state, start
136 // parsing for instance number and go to the ininstance state.
137 $this->addEntryPattern("instance:\S+","accept","ininstance");
139 // Snarf everything into the username until we see whitespace, then exit
140 // back to the base accept state.
141 $this->addExitPattern("\s","ininstance");
144 // Patterns to handle strings of the form userid:foo
146 // If we see the string userid: while in the base accept state, start
147 // parsing a username and go to the inuserid state.
148 $this->addEntryPattern("userid:\S+","accept","inuserid");
150 // Snarf everything into the username until we see whitespace, then exit
151 // back to the base accept state.
152 $this->addExitPattern("\s","inuserid");
155 // Patterns to handle strings of the form user:foo
157 // If we see the string user: while in the base accept state, start
158 // parsing a username and go to the inusername state.
159 $this->addEntryPattern("user:\S+","accept","inusername");
161 // Snarf everything into the username until we see whitespace, then exit
162 // back to the base accept state.
163 $this->addExitPattern("\s","inusername");
166 // Patterns to handle strings of the form meta:foo
168 // If we see the string meta: while in the base accept state, start
169 // parsing a username and go to the inmeta state.
170 $this->addEntryPattern("subject:\S+","accept","inmeta");
172 // Snarf everything into the meta token until we see whitespace, then exit
173 // back to the base accept state.
174 $this->addExitPattern("\s","inmeta");
177 // Patterns to handle required exact match strings (+foo) .
179 // If we see a + sign while in the base accept state, start
180 // parsing an exact match string and enter the inrequired state
181 $this->addEntryPattern("\+\S+","accept","inrequired");
182 // When we see white space, exit back to accept state.
183 $this->addExitPattern("\s","inrequired");
185 // Handle excluded strings (-foo)
187 // If we see a - sign while in the base accept state, start
188 // parsing an excluded string and enter the inexcluded state
189 $this->addEntryPattern("\-\S+","accept","inexcluded");
190 // When we see white space, exit back to accept state.
191 $this->addExitPattern("\s","inexcluded");
194 // Patterns to handle quoted strings.
196 // If we see a quote while in the base accept state, start
197 // parsing a quoted string and enter the inquotedstring state.
198 // Grab everything until we see the closing quote.
200 $this->addEntryPattern("\"[^\"]+","accept","inquotedstring");
202 // When we see a closing quote, reenter the base accept state.
203 $this->addExitPattern("\"","inquotedstring");
205 // Patterns to handle ordinary, nonquoted words.
207 // When we see non-whitespace, snarf everything into the nonquoted word
208 // until we see whitespace again.
209 $this->addEntryPattern("\S+","accept","plainstring");
211 // Once we see whitespace, reenter the base accept state.
212 $this->addExitPattern("\s","plainstring");
217 * Old syntax of class constructor. Deprecated in PHP7.
219 * @deprecated since Moodle 3.1
221 public function search_lexer(&$parser) {
222 debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER);
223 self::__construct($parser);
231 * This class takes care of sticking the proper token type/value pairs into
232 * the parsed token array.
233 * Most functions in this class should only be called by the lexer, the
234 * one exception being getParseArray() which returns the result.
236 * @package moodlecore
237 * @copyright 1999 onwards Martin Dougiamas {@link http://moodle.com}
238 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
240 class search_parser {
241 private $tokens;
243 // This function is called by the code that's interested in the result of the parse operation.
244 function get_parsed_array(){
245 return $this->tokens;
249 * Functions below this are part of the state machine for the parse
250 * operation and should not be called directly.
253 // Base state. No output emitted.
254 function accept() {
255 return true;
258 // State for handling datefrom:foo constructs. Potentially emits a token.
259 function indatefrom($content){
260 if (strlen($content) < 10) { // State exit or missing parameter.
261 return true;
263 // Strip off the datefrom: part and add the reminder to the parsed token array
264 $param = trim(substr($content,9));
265 $this->tokens[] = new search_token(TOKEN_DATEFROM,$param);
266 return true;
269 // State for handling dateto:foo constructs. Potentially emits a token.
270 function indateto($content){
271 if (strlen($content) < 8) { // State exit or missing parameter.
272 return true;
274 // Strip off the dateto: part and add the reminder to the parsed token array
275 $param = trim(substr($content,7));
276 $this->tokens[] = new search_token(TOKEN_DATETO,$param);
277 return true;
280 // State for handling tags:tagname,tagname constructs. Potentially emits a token.
281 function intags($content){
282 if (strlen($content) < 5) { // State exit or missing parameter.
283 return true;
285 // Strip off the tags: part and add the reminder to the parsed token array
286 $param = trim(substr($content,5));
287 $this->tokens[] = new search_token(TOKEN_TAGS,$param);
288 return true;
291 // State for handling instance:foo constructs. Potentially emits a token.
292 function ininstance($content){
293 if (strlen($content) < 10) { // State exit or missing parameter.
294 return true;
296 // Strip off the instance: part and add the reminder to the parsed token array
297 $param = trim(substr($content,9));
298 $this->tokens[] = new search_token(TOKEN_INSTANCE,$param);
299 return true;
303 // State for handling userid:foo constructs. Potentially emits a token.
304 function inuserid($content){
305 if (strlen($content) < 8) { // State exit or missing parameter.
306 return true;
308 // Strip off the userid: part and add the reminder to the parsed token array
309 $param = trim(substr($content,7));
310 $this->tokens[] = new search_token(TOKEN_USERID,$param);
311 return true;
315 // State for handling user:foo constructs. Potentially emits a token.
316 function inusername($content){
317 if (strlen($content) < 6) { // State exit or missing parameter.
318 return true;
320 // Strip off the user: part and add the reminder to the parsed token array
321 $param = trim(substr($content,5));
322 $this->tokens[] = new search_token(TOKEN_USER,$param);
323 return true;
327 // State for handling meta:foo constructs. Potentially emits a token.
328 function inmeta($content){
329 if (strlen($content) < 9) { // Missing parameter.
330 return true;
332 // Strip off the meta: part and add the reminder to the parsed token array.
333 $param = trim(substr($content,8));
334 $this->tokens[] = new search_token(TOKEN_META,$param);
335 return true;
339 // State entered when we've seen a required string (+foo). Potentially
340 // emits a token.
341 function inrequired($content){
342 if (strlen($content) < 2) { // State exit or missing parameter, don't emit.
343 return true;
345 // Strip off the + sign and add the reminder to the parsed token array.
346 $this->tokens[] = new search_token(TOKEN_EXACT,substr($content,1));
347 return true;
350 // State entered when we've seen an excluded string (-foo). Potentially
351 // emits a token.
352 function inexcluded($content){
353 if (strlen($content) < 2) { // State exit or missing parameter.
354 return true;
356 // Strip off the -sign and add the reminder to the parsed token array.
357 $this->tokens[] = new search_token(TOKEN_NEGATE,substr($content,1));
358 return true;
362 // State entered when we've seen a quoted string. Potentially emits a token.
363 function inquotedstring($content){
364 if (strlen($content) < 2) { // State exit or missing parameter.
365 return true;
367 // Strip off the opening quote and add the reminder to the parsed token array.
368 $this->tokens[] = new search_token(TOKEN_STRING,substr($content,1));
369 return true;
372 // State entered when we've seen an ordinary, non-quoted word. Potentially
373 // emits a token.
374 function plainstring($content){
375 if (trim($content) === '') { // State exit
376 return true;
378 // Add the string to the parsed token array.
379 $this->tokens[] = new search_token(TOKEN_STRING,$content);
380 return true;
385 * Primitive function to generate a SQL string from a parse tree
386 * using TEXT indexes. If searches aren't suitable to use TEXT
387 * this function calls the default search_generate_SQL() one.
389 * @deprecated since Moodle 2.9 MDL-48939
390 * @todo MDL-48940 This will be deleted in Moodle 3.2
391 * @see search_generate_SQL()
393 function search_generate_text_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
394 $userfirstnamefield, $userlastnamefield, $timefield, $instancefield) {
395 debugging('search_generate_text_SQL() is deprecated, please use search_generate_SQL() instead.', DEBUG_DEVELOPER);
397 return search_generate_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
398 $userfirstnamefield, $userlastnamefield, $timefield, $instancefield);
402 * Primitive function to generate a SQL string from a parse tree.
403 * Parameters:
405 * $parsetree should be a parse tree generated by a
406 * search_lexer/search_parser combination.
407 * Other fields are database table names to search.
409 * @global object
410 * @global object
412 function search_generate_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
413 $userfirstnamefield, $userlastnamefield, $timefield, $instancefield,
414 $tagfields = []) {
415 global $CFG, $DB;
416 static $p = 0;
418 if ($DB->sql_regex_supported()) {
419 $REGEXP = $DB->sql_regex(true);
420 $NOTREGEXP = $DB->sql_regex(false);
423 $params = array();
425 $ntokens = count($parsetree);
426 if ($ntokens == 0) {
427 return "";
430 $SQLString = '';
431 $nexttagfield = 0;
432 for ($i=0; $i<$ntokens; $i++){
433 if ($i > 0) {// We have more than one clause, need to tack on AND
434 $SQLString .= ' AND ';
437 $type = $parsetree[$i]->getType();
438 $value = $parsetree[$i]->getValue();
440 /// Under Oracle and MSSQL, transform TOKEN searches into STRING searches and trim +- chars
441 if (!$DB->sql_regex_supported()) {
442 $value = trim($value, '+-');
443 if ($type == TOKEN_EXACT) {
444 $type = TOKEN_STRING;
448 $name1 = 'sq'.$p++;
449 $name2 = 'sq'.$p++;
451 switch($type){
452 case TOKEN_STRING:
453 $SQLString .= "((".$DB->sql_like($datafield, ":$name1", false).") OR (".$DB->sql_like($metafield, ":$name2", false)."))";
454 $params[$name1] = "%$value%";
455 $params[$name2] = "%$value%";
456 break;
457 case TOKEN_EXACT:
458 $SQLString .= "(($datafield $REGEXP :$name1) OR ($metafield $REGEXP :$name2))";
459 $params[$name1] = "[[:<:]]".$value."[[:>:]]";
460 $params[$name2] = "[[:<:]]".$value."[[:>:]]";
461 break;
462 case TOKEN_META:
463 if ($metafield != '') {
464 $SQLString .= "(".$DB->sql_like($metafield, ":$name1", false).")";
465 $params[$name1] = "%$value%";
467 break;
468 case TOKEN_USER:
469 $SQLString .= "(($mainidfield = $useridfield) AND ((".$DB->sql_like($userfirstnamefield, ":$name1", false).") OR (".$DB->sql_like($userlastnamefield, ":$name2", false).")))";
470 $params[$name1] = "%$value%";
471 $params[$name2] = "%$value%";
472 break;
473 case TOKEN_USERID:
474 $SQLString .= "($useridfield = :$name1)";
475 $params[$name1] = $value;
476 break;
477 case TOKEN_INSTANCE:
478 $SQLString .= "($instancefield = :$name1)";
479 $params[$name1] = $value;
480 break;
481 case TOKEN_DATETO:
482 $SQLString .= "($timefield <= :$name1)";
483 $params[$name1] = $value;
484 break;
485 case TOKEN_DATEFROM:
486 $SQLString .= "($timefield >= :$name1)";
487 $params[$name1] = $value;
488 break;
489 case TOKEN_TAGS:
490 $sqlstrings = [];
491 foreach (explode(',', $value) as $tag) {
492 $paramname = $name1 . '_' . $nexttagfield;
493 if (isset($tagfields[$nexttagfield])) {
494 $sqlstrings[] = "($tagfields[$nexttagfield] = :$paramname)";
495 $params[$paramname] = $tag;
496 } else if (!isset($tagfields[$nexttagfield]) && !isset($stoppedprocessingtags)) {
497 // Show a debugging message the first time we hit this.
498 $stoppedprocessingtags = true;
499 \core\notification::add(get_string('toomanytags'), \core\notification::WARNING);
501 $nexttagfield++;
503 $SQLString .= implode(' AND ', $sqlstrings);
504 break;
505 case TOKEN_NEGATE:
506 $SQLString .= "(NOT ((".$DB->sql_like($datafield, ":$name1", false).") OR (".$DB->sql_like($metafield, ":$name2", false).")))";
507 $params[$name1] = "%$value%";
508 $params[$name2] = "%$value%";
509 break;
510 default:
511 return '';
515 return array($SQLString, $params);