Merge branch 'MDL-52763-30' of git://github.com/danpoltawski/moodle into MOODLE_30_STABLE
[moodle.git] / lib / searchlib.php
blob8fd87a9c9d07f880ddd76798901ceec773b23d3c
1 <?php
3 // This file is part of Moodle - http://moodle.org/
4 //
5 // Moodle is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // Moodle is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
18 /**
19 * @package core
20 * @subpackage search
21 * @copyright 1999 onwards Martin Dougiamas {@link http://moodle.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25 defined('MOODLE_INTERNAL') || die();
27 /** @see lexer.php */
28 require_once($CFG->libdir.'/lexer.php');
30 /** Constants for the various types of tokens */
32 define("TOKEN_USER","0");
33 define("TOKEN_META","1");
34 define("TOKEN_EXACT","2");
35 define("TOKEN_NEGATE","3");
36 define("TOKEN_STRING","4");
37 define("TOKEN_USERID","5");
38 define("TOKEN_DATEFROM","6");
39 define("TOKEN_DATETO","7");
40 define("TOKEN_INSTANCE","8");
42 /**
43 * Class to hold token/value pairs after they're parsed.
45 * @package moodlecore
46 * @copyright 1999 onwards Martin Dougiamas {@link http://moodle.com}
47 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
49 class search_token {
50 private $value;
51 private $type;
53 public function __construct($type,$value){
54 $this->type = $type;
55 $this->value = $this->sanitize($value);
59 /**
60 * Old syntax of class constructor. Deprecated in PHP7.
62 public function search_token($type, $value) {
63 self::__construct($type, $value);
66 // Try to clean up user input to avoid potential security issues.
67 // Need to think about this some more.
69 function sanitize($userstring){
70 return htmlspecialchars($userstring);
72 function getValue(){
73 return $this->value;
75 function getType(){
76 return $this->type;
81 /**
82 * This class does the heavy lifting of lexing the search string into tokens.
83 * Using a full-blown lexer is probably overkill for this application, but
84 * might be useful for other tasks.
86 * @package moodlecore
87 * @copyright 1999 onwards Martin Dougiamas {@link http://moodle.com}
88 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
90 class search_lexer extends Lexer{
92 public function __construct(&$parser){
94 // Call parent constructor.
95 parent::__construct($parser);
97 //Set up the state machine and pattern matches for transitions.
99 // Patterns to handle strings of the form datefrom:foo
101 // If we see the string datefrom: while in the base accept state, start
102 // parsing a username and go to the indatefrom state.
103 $this->addEntryPattern("datefrom:\S+","accept","indatefrom");
105 // Snarf everything into the username until we see whitespace, then exit
106 // back to the base accept state.
107 $this->addExitPattern("\s","indatefrom");
110 // Patterns to handle strings of the form dateto:foo
112 // If we see the string dateto: while in the base accept state, start
113 // parsing a username and go to the indateto state.
114 $this->addEntryPattern("dateto:\S+","accept","indateto");
116 // Snarf everything into the username until we see whitespace, then exit
117 // back to the base accept state.
118 $this->addExitPattern("\s","indateto");
121 // Patterns to handle strings of the form instance:foo
123 // If we see the string instance: while in the base accept state, start
124 // parsing for instance number and go to the ininstance state.
125 $this->addEntryPattern("instance:\S+","accept","ininstance");
127 // Snarf everything into the username until we see whitespace, then exit
128 // back to the base accept state.
129 $this->addExitPattern("\s","ininstance");
132 // Patterns to handle strings of the form userid:foo
134 // If we see the string userid: while in the base accept state, start
135 // parsing a username and go to the inuserid state.
136 $this->addEntryPattern("userid:\S+","accept","inuserid");
138 // Snarf everything into the username until we see whitespace, then exit
139 // back to the base accept state.
140 $this->addExitPattern("\s","inuserid");
143 // Patterns to handle strings of the form user:foo
145 // If we see the string user: while in the base accept state, start
146 // parsing a username and go to the inusername state.
147 $this->addEntryPattern("user:\S+","accept","inusername");
149 // Snarf everything into the username until we see whitespace, then exit
150 // back to the base accept state.
151 $this->addExitPattern("\s","inusername");
154 // Patterns to handle strings of the form meta:foo
156 // If we see the string meta: while in the base accept state, start
157 // parsing a username and go to the inmeta state.
158 $this->addEntryPattern("subject:\S+","accept","inmeta");
160 // Snarf everything into the meta token until we see whitespace, then exit
161 // back to the base accept state.
162 $this->addExitPattern("\s","inmeta");
165 // Patterns to handle required exact match strings (+foo) .
167 // If we see a + sign while in the base accept state, start
168 // parsing an exact match string and enter the inrequired state
169 $this->addEntryPattern("\+\S+","accept","inrequired");
170 // When we see white space, exit back to accept state.
171 $this->addExitPattern("\s","inrequired");
173 // Handle excluded strings (-foo)
175 // If we see a - sign while in the base accept state, start
176 // parsing an excluded string and enter the inexcluded state
177 $this->addEntryPattern("\-\S+","accept","inexcluded");
178 // When we see white space, exit back to accept state.
179 $this->addExitPattern("\s","inexcluded");
182 // Patterns to handle quoted strings.
184 // If we see a quote while in the base accept state, start
185 // parsing a quoted string and enter the inquotedstring state.
186 // Grab everything until we see the closing quote.
188 $this->addEntryPattern("\"[^\"]+","accept","inquotedstring");
190 // When we see a closing quote, reenter the base accept state.
191 $this->addExitPattern("\"","inquotedstring");
193 // Patterns to handle ordinary, nonquoted words.
195 // When we see non-whitespace, snarf everything into the nonquoted word
196 // until we see whitespace again.
197 $this->addEntryPattern("\S+","accept","plainstring");
199 // Once we see whitespace, reenter the base accept state.
200 $this->addExitPattern("\s","plainstring");
205 * Old syntax of class constructor. Deprecated in PHP7.
207 public function search_lexer(&$parser) {
208 self::__construct($parser);
216 * This class takes care of sticking the proper token type/value pairs into
217 * the parsed token array.
218 * Most functions in this class should only be called by the lexer, the
219 * one exception being getParseArray() which returns the result.
221 * @package moodlecore
222 * @copyright 1999 onwards Martin Dougiamas {@link http://moodle.com}
223 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
225 class search_parser {
226 private $tokens;
228 // This function is called by the code that's interested in the result of the parse operation.
229 function get_parsed_array(){
230 return $this->tokens;
234 * Functions below this are part of the state machine for the parse
235 * operation and should not be called directly.
238 // Base state. No output emitted.
239 function accept() {
240 return true;
243 // State for handling datefrom:foo constructs. Potentially emits a token.
244 function indatefrom($content){
245 if (strlen($content) < 10) { // State exit or missing parameter.
246 return true;
248 // Strip off the datefrom: part and add the reminder to the parsed token array
249 $param = trim(substr($content,9));
250 $this->tokens[] = new search_token(TOKEN_DATEFROM,$param);
251 return true;
254 // State for handling dateto:foo constructs. Potentially emits a token.
255 function indateto($content){
256 if (strlen($content) < 8) { // State exit or missing parameter.
257 return true;
259 // Strip off the dateto: part and add the reminder to the parsed token array
260 $param = trim(substr($content,7));
261 $this->tokens[] = new search_token(TOKEN_DATETO,$param);
262 return true;
265 // State for handling instance:foo constructs. Potentially emits a token.
266 function ininstance($content){
267 if (strlen($content) < 10) { // State exit or missing parameter.
268 return true;
270 // Strip off the instance: part and add the reminder to the parsed token array
271 $param = trim(substr($content,9));
272 $this->tokens[] = new search_token(TOKEN_INSTANCE,$param);
273 return true;
277 // State for handling userid:foo constructs. Potentially emits a token.
278 function inuserid($content){
279 if (strlen($content) < 8) { // State exit or missing parameter.
280 return true;
282 // Strip off the userid: part and add the reminder to the parsed token array
283 $param = trim(substr($content,7));
284 $this->tokens[] = new search_token(TOKEN_USERID,$param);
285 return true;
289 // State for handling user:foo constructs. Potentially emits a token.
290 function inusername($content){
291 if (strlen($content) < 6) { // State exit or missing parameter.
292 return true;
294 // Strip off the user: part and add the reminder to the parsed token array
295 $param = trim(substr($content,5));
296 $this->tokens[] = new search_token(TOKEN_USER,$param);
297 return true;
301 // State for handling meta:foo constructs. Potentially emits a token.
302 function inmeta($content){
303 if (strlen($content) < 9) { // Missing parameter.
304 return true;
306 // Strip off the meta: part and add the reminder to the parsed token array.
307 $param = trim(substr($content,8));
308 $this->tokens[] = new search_token(TOKEN_META,$param);
309 return true;
313 // State entered when we've seen a required string (+foo). Potentially
314 // emits a token.
315 function inrequired($content){
316 if (strlen($content) < 2) { // State exit or missing parameter, don't emit.
317 return true;
319 // Strip off the + sign and add the reminder to the parsed token array.
320 $this->tokens[] = new search_token(TOKEN_EXACT,substr($content,1));
321 return true;
324 // State entered when we've seen an excluded string (-foo). Potentially
325 // emits a token.
326 function inexcluded($content){
327 if (strlen($content) < 2) { // State exit or missing parameter.
328 return true;
330 // Strip off the -sign and add the reminder to the parsed token array.
331 $this->tokens[] = new search_token(TOKEN_NEGATE,substr($content,1));
332 return true;
336 // State entered when we've seen a quoted string. Potentially emits a token.
337 function inquotedstring($content){
338 if (strlen($content) < 2) { // State exit or missing parameter.
339 return true;
341 // Strip off the opening quote and add the reminder to the parsed token array.
342 $this->tokens[] = new search_token(TOKEN_STRING,substr($content,1));
343 return true;
346 // State entered when we've seen an ordinary, non-quoted word. Potentially
347 // emits a token.
348 function plainstring($content){
349 if (trim($content) === '') { // State exit
350 return true;
352 // Add the string to the parsed token array.
353 $this->tokens[] = new search_token(TOKEN_STRING,$content);
354 return true;
359 * Primitive function to generate a SQL string from a parse tree
360 * using TEXT indexes. If searches aren't suitable to use TEXT
361 * this function calls the default search_generate_SQL() one.
363 * @deprecated since Moodle 2.9 MDL-48939
364 * @todo MDL-48940 This will be deleted in Moodle 3.2
365 * @see search_generate_SQL()
367 function search_generate_text_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
368 $userfirstnamefield, $userlastnamefield, $timefield, $instancefield) {
369 debugging('search_generate_text_SQL() is deprecated, please use search_generate_SQL() instead.', DEBUG_DEVELOPER);
371 return search_generate_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
372 $userfirstnamefield, $userlastnamefield, $timefield, $instancefield);
376 * Primitive function to generate a SQL string from a parse tree.
377 * Parameters:
379 * $parsetree should be a parse tree generated by a
380 * search_lexer/search_parser combination.
381 * Other fields are database table names to search.
383 * @global object
384 * @global object
386 function search_generate_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
387 $userfirstnamefield, $userlastnamefield, $timefield, $instancefield) {
388 global $CFG, $DB;
389 static $p = 0;
391 if ($DB->sql_regex_supported()) {
392 $REGEXP = $DB->sql_regex(true);
393 $NOTREGEXP = $DB->sql_regex(false);
396 $params = array();
398 $ntokens = count($parsetree);
399 if ($ntokens == 0) {
400 return "";
403 $SQLString = '';
405 for ($i=0; $i<$ntokens; $i++){
406 if ($i > 0) {// We have more than one clause, need to tack on AND
407 $SQLString .= ' AND ';
410 $type = $parsetree[$i]->getType();
411 $value = $parsetree[$i]->getValue();
413 /// Under Oracle and MSSQL, transform TOKEN searches into STRING searches and trim +- chars
414 if (!$DB->sql_regex_supported()) {
415 $value = trim($value, '+-');
416 if ($type == TOKEN_EXACT) {
417 $type = TOKEN_STRING;
421 $name1 = 'sq'.$p++;
422 $name2 = 'sq'.$p++;
424 switch($type){
425 case TOKEN_STRING:
426 $SQLString .= "((".$DB->sql_like($datafield, ":$name1", false).") OR (".$DB->sql_like($metafield, ":$name2", false)."))";
427 $params[$name1] = "%$value%";
428 $params[$name2] = "%$value%";
429 break;
430 case TOKEN_EXACT:
431 $SQLString .= "(($datafield $REGEXP :$name1) OR ($metafield $REGEXP :$name2))";
432 $params[$name1] = "[[:<:]]".$value."[[:>:]]";
433 $params[$name2] = "[[:<:]]".$value."[[:>:]]";
434 break;
435 case TOKEN_META:
436 if ($metafield != '') {
437 $SQLString .= "(".$DB->sql_like($metafield, ":$name1", false).")";
438 $params[$name1] = "%$value%";
440 break;
441 case TOKEN_USER:
442 $SQLString .= "(($mainidfield = $useridfield) AND ((".$DB->sql_like($userfirstnamefield, ":$name1", false).") OR (".$DB->sql_like($userlastnamefield, ":$name2", false).")))";
443 $params[$name1] = "%$value%";
444 $params[$name2] = "%$value%";
445 break;
446 case TOKEN_USERID:
447 $SQLString .= "($useridfield = :$name1)";
448 $params[$name1] = $value;
449 break;
450 case TOKEN_INSTANCE:
451 $SQLString .= "($instancefield = :$name1)";
452 $params[$name1] = $value;
453 break;
454 case TOKEN_DATETO:
455 $SQLString .= "($timefield <= :$name1)";
456 $params[$name1] = $value;
457 break;
458 case TOKEN_DATEFROM:
459 $SQLString .= "($timefield >= :$name1)";
460 $params[$name1] = $value;
461 break;
462 case TOKEN_NEGATE:
463 $SQLString .= "(NOT ((".$DB->sql_like($datafield, ":$name1", false).") OR (".$DB->sql_like($metafield, ":$name2", false).")))";
464 $params[$name1] = "%$value%";
465 $params[$name2] = "%$value%";
466 break;
467 default:
468 return '';
472 return array($SQLString, $params);