Merge branch 'MDL-43230-master' of git://github.com/ryanwyllie/moodle
[moodle.git] / lib / searchlib.php
blob1393cd4dd326600284a24276af42b8488533ee0b
1 <?php
3 // This file is part of Moodle - http://moodle.org/
4 //
5 // Moodle is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // Moodle is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
18 /**
19 * @package core
20 * @subpackage search
21 * @copyright 1999 onwards Martin Dougiamas {@link http://moodle.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25 defined('MOODLE_INTERNAL') || die();
27 /** @see lexer.php */
28 require_once($CFG->libdir.'/lexer.php');
30 /** Constants for the various types of tokens */
32 define("TOKEN_USER","0");
33 define("TOKEN_META","1");
34 define("TOKEN_EXACT","2");
35 define("TOKEN_NEGATE","3");
36 define("TOKEN_STRING","4");
37 define("TOKEN_USERID","5");
38 define("TOKEN_DATEFROM","6");
39 define("TOKEN_DATETO","7");
40 define("TOKEN_INSTANCE","8");
42 /**
43 * Class to hold token/value pairs after they're parsed.
45 * @package moodlecore
46 * @copyright 1999 onwards Martin Dougiamas {@link http://moodle.com}
47 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
49 class search_token {
50 private $value;
51 private $type;
53 public function __construct($type,$value){
54 $this->type = $type;
55 $this->value = $this->sanitize($value);
59 /**
60 * Old syntax of class constructor. Deprecated in PHP7.
62 * @deprecated since Moodle 3.1
64 public function search_token($type, $value) {
65 debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER);
66 self::__construct($type, $value);
69 // Try to clean up user input to avoid potential security issues.
70 // Need to think about this some more.
72 function sanitize($userstring){
73 return htmlspecialchars($userstring);
75 function getValue(){
76 return $this->value;
78 function getType(){
79 return $this->type;
84 /**
85 * This class does the heavy lifting of lexing the search string into tokens.
86 * Using a full-blown lexer is probably overkill for this application, but
87 * might be useful for other tasks.
89 * @package moodlecore
90 * @copyright 1999 onwards Martin Dougiamas {@link http://moodle.com}
91 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
93 class search_lexer extends Lexer{
95 public function __construct(&$parser){
97 // Call parent constructor.
98 parent::__construct($parser);
100 //Set up the state machine and pattern matches for transitions.
102 // Patterns to handle strings of the form datefrom:foo
104 // If we see the string datefrom: while in the base accept state, start
105 // parsing a username and go to the indatefrom state.
106 $this->addEntryPattern("datefrom:\S+","accept","indatefrom");
108 // Snarf everything into the username until we see whitespace, then exit
109 // back to the base accept state.
110 $this->addExitPattern("\s","indatefrom");
113 // Patterns to handle strings of the form dateto:foo
115 // If we see the string dateto: while in the base accept state, start
116 // parsing a username and go to the indateto state.
117 $this->addEntryPattern("dateto:\S+","accept","indateto");
119 // Snarf everything into the username until we see whitespace, then exit
120 // back to the base accept state.
121 $this->addExitPattern("\s","indateto");
124 // Patterns to handle strings of the form instance:foo
126 // If we see the string instance: while in the base accept state, start
127 // parsing for instance number and go to the ininstance state.
128 $this->addEntryPattern("instance:\S+","accept","ininstance");
130 // Snarf everything into the username until we see whitespace, then exit
131 // back to the base accept state.
132 $this->addExitPattern("\s","ininstance");
135 // Patterns to handle strings of the form userid:foo
137 // If we see the string userid: while in the base accept state, start
138 // parsing a username and go to the inuserid state.
139 $this->addEntryPattern("userid:\S+","accept","inuserid");
141 // Snarf everything into the username until we see whitespace, then exit
142 // back to the base accept state.
143 $this->addExitPattern("\s","inuserid");
146 // Patterns to handle strings of the form user:foo
148 // If we see the string user: while in the base accept state, start
149 // parsing a username and go to the inusername state.
150 $this->addEntryPattern("user:\S+","accept","inusername");
152 // Snarf everything into the username until we see whitespace, then exit
153 // back to the base accept state.
154 $this->addExitPattern("\s","inusername");
157 // Patterns to handle strings of the form meta:foo
159 // If we see the string meta: while in the base accept state, start
160 // parsing a username and go to the inmeta state.
161 $this->addEntryPattern("subject:\S+","accept","inmeta");
163 // Snarf everything into the meta token until we see whitespace, then exit
164 // back to the base accept state.
165 $this->addExitPattern("\s","inmeta");
168 // Patterns to handle required exact match strings (+foo) .
170 // If we see a + sign while in the base accept state, start
171 // parsing an exact match string and enter the inrequired state
172 $this->addEntryPattern("\+\S+","accept","inrequired");
173 // When we see white space, exit back to accept state.
174 $this->addExitPattern("\s","inrequired");
176 // Handle excluded strings (-foo)
178 // If we see a - sign while in the base accept state, start
179 // parsing an excluded string and enter the inexcluded state
180 $this->addEntryPattern("\-\S+","accept","inexcluded");
181 // When we see white space, exit back to accept state.
182 $this->addExitPattern("\s","inexcluded");
185 // Patterns to handle quoted strings.
187 // If we see a quote while in the base accept state, start
188 // parsing a quoted string and enter the inquotedstring state.
189 // Grab everything until we see the closing quote.
191 $this->addEntryPattern("\"[^\"]+","accept","inquotedstring");
193 // When we see a closing quote, reenter the base accept state.
194 $this->addExitPattern("\"","inquotedstring");
196 // Patterns to handle ordinary, nonquoted words.
198 // When we see non-whitespace, snarf everything into the nonquoted word
199 // until we see whitespace again.
200 $this->addEntryPattern("\S+","accept","plainstring");
202 // Once we see whitespace, reenter the base accept state.
203 $this->addExitPattern("\s","plainstring");
208 * Old syntax of class constructor. Deprecated in PHP7.
210 * @deprecated since Moodle 3.1
212 public function search_lexer(&$parser) {
213 debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER);
214 self::__construct($parser);
222 * This class takes care of sticking the proper token type/value pairs into
223 * the parsed token array.
224 * Most functions in this class should only be called by the lexer, the
225 * one exception being getParseArray() which returns the result.
227 * @package moodlecore
228 * @copyright 1999 onwards Martin Dougiamas {@link http://moodle.com}
229 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
231 class search_parser {
232 private $tokens;
234 // This function is called by the code that's interested in the result of the parse operation.
235 function get_parsed_array(){
236 return $this->tokens;
240 * Functions below this are part of the state machine for the parse
241 * operation and should not be called directly.
244 // Base state. No output emitted.
245 function accept() {
246 return true;
249 // State for handling datefrom:foo constructs. Potentially emits a token.
250 function indatefrom($content){
251 if (strlen($content) < 10) { // State exit or missing parameter.
252 return true;
254 // Strip off the datefrom: part and add the reminder to the parsed token array
255 $param = trim(substr($content,9));
256 $this->tokens[] = new search_token(TOKEN_DATEFROM,$param);
257 return true;
260 // State for handling dateto:foo constructs. Potentially emits a token.
261 function indateto($content){
262 if (strlen($content) < 8) { // State exit or missing parameter.
263 return true;
265 // Strip off the dateto: part and add the reminder to the parsed token array
266 $param = trim(substr($content,7));
267 $this->tokens[] = new search_token(TOKEN_DATETO,$param);
268 return true;
271 // State for handling instance:foo constructs. Potentially emits a token.
272 function ininstance($content){
273 if (strlen($content) < 10) { // State exit or missing parameter.
274 return true;
276 // Strip off the instance: part and add the reminder to the parsed token array
277 $param = trim(substr($content,9));
278 $this->tokens[] = new search_token(TOKEN_INSTANCE,$param);
279 return true;
283 // State for handling userid:foo constructs. Potentially emits a token.
284 function inuserid($content){
285 if (strlen($content) < 8) { // State exit or missing parameter.
286 return true;
288 // Strip off the userid: part and add the reminder to the parsed token array
289 $param = trim(substr($content,7));
290 $this->tokens[] = new search_token(TOKEN_USERID,$param);
291 return true;
295 // State for handling user:foo constructs. Potentially emits a token.
296 function inusername($content){
297 if (strlen($content) < 6) { // State exit or missing parameter.
298 return true;
300 // Strip off the user: part and add the reminder to the parsed token array
301 $param = trim(substr($content,5));
302 $this->tokens[] = new search_token(TOKEN_USER,$param);
303 return true;
307 // State for handling meta:foo constructs. Potentially emits a token.
308 function inmeta($content){
309 if (strlen($content) < 9) { // Missing parameter.
310 return true;
312 // Strip off the meta: part and add the reminder to the parsed token array.
313 $param = trim(substr($content,8));
314 $this->tokens[] = new search_token(TOKEN_META,$param);
315 return true;
319 // State entered when we've seen a required string (+foo). Potentially
320 // emits a token.
321 function inrequired($content){
322 if (strlen($content) < 2) { // State exit or missing parameter, don't emit.
323 return true;
325 // Strip off the + sign and add the reminder to the parsed token array.
326 $this->tokens[] = new search_token(TOKEN_EXACT,substr($content,1));
327 return true;
330 // State entered when we've seen an excluded string (-foo). Potentially
331 // emits a token.
332 function inexcluded($content){
333 if (strlen($content) < 2) { // State exit or missing parameter.
334 return true;
336 // Strip off the -sign and add the reminder to the parsed token array.
337 $this->tokens[] = new search_token(TOKEN_NEGATE,substr($content,1));
338 return true;
342 // State entered when we've seen a quoted string. Potentially emits a token.
343 function inquotedstring($content){
344 if (strlen($content) < 2) { // State exit or missing parameter.
345 return true;
347 // Strip off the opening quote and add the reminder to the parsed token array.
348 $this->tokens[] = new search_token(TOKEN_STRING,substr($content,1));
349 return true;
352 // State entered when we've seen an ordinary, non-quoted word. Potentially
353 // emits a token.
354 function plainstring($content){
355 if (trim($content) === '') { // State exit
356 return true;
358 // Add the string to the parsed token array.
359 $this->tokens[] = new search_token(TOKEN_STRING,$content);
360 return true;
365 * Primitive function to generate a SQL string from a parse tree
366 * using TEXT indexes. If searches aren't suitable to use TEXT
367 * this function calls the default search_generate_SQL() one.
369 * @deprecated since Moodle 2.9 MDL-48939
370 * @todo MDL-48940 This will be deleted in Moodle 3.2
371 * @see search_generate_SQL()
373 function search_generate_text_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
374 $userfirstnamefield, $userlastnamefield, $timefield, $instancefield) {
375 debugging('search_generate_text_SQL() is deprecated, please use search_generate_SQL() instead.', DEBUG_DEVELOPER);
377 return search_generate_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
378 $userfirstnamefield, $userlastnamefield, $timefield, $instancefield);
382 * Primitive function to generate a SQL string from a parse tree.
383 * Parameters:
385 * $parsetree should be a parse tree generated by a
386 * search_lexer/search_parser combination.
387 * Other fields are database table names to search.
389 * @global object
390 * @global object
392 function search_generate_SQL($parsetree, $datafield, $metafield, $mainidfield, $useridfield,
393 $userfirstnamefield, $userlastnamefield, $timefield, $instancefield) {
394 global $CFG, $DB;
395 static $p = 0;
397 if ($DB->sql_regex_supported()) {
398 $REGEXP = $DB->sql_regex(true);
399 $NOTREGEXP = $DB->sql_regex(false);
402 $params = array();
404 $ntokens = count($parsetree);
405 if ($ntokens == 0) {
406 return "";
409 $SQLString = '';
411 for ($i=0; $i<$ntokens; $i++){
412 if ($i > 0) {// We have more than one clause, need to tack on AND
413 $SQLString .= ' AND ';
416 $type = $parsetree[$i]->getType();
417 $value = $parsetree[$i]->getValue();
419 /// Under Oracle and MSSQL, transform TOKEN searches into STRING searches and trim +- chars
420 if (!$DB->sql_regex_supported()) {
421 $value = trim($value, '+-');
422 if ($type == TOKEN_EXACT) {
423 $type = TOKEN_STRING;
427 $name1 = 'sq'.$p++;
428 $name2 = 'sq'.$p++;
430 switch($type){
431 case TOKEN_STRING:
432 $SQLString .= "((".$DB->sql_like($datafield, ":$name1", false).") OR (".$DB->sql_like($metafield, ":$name2", false)."))";
433 $params[$name1] = "%$value%";
434 $params[$name2] = "%$value%";
435 break;
436 case TOKEN_EXACT:
437 $SQLString .= "(($datafield $REGEXP :$name1) OR ($metafield $REGEXP :$name2))";
438 $params[$name1] = "[[:<:]]".$value."[[:>:]]";
439 $params[$name2] = "[[:<:]]".$value."[[:>:]]";
440 break;
441 case TOKEN_META:
442 if ($metafield != '') {
443 $SQLString .= "(".$DB->sql_like($metafield, ":$name1", false).")";
444 $params[$name1] = "%$value%";
446 break;
447 case TOKEN_USER:
448 $SQLString .= "(($mainidfield = $useridfield) AND ((".$DB->sql_like($userfirstnamefield, ":$name1", false).") OR (".$DB->sql_like($userlastnamefield, ":$name2", false).")))";
449 $params[$name1] = "%$value%";
450 $params[$name2] = "%$value%";
451 break;
452 case TOKEN_USERID:
453 $SQLString .= "($useridfield = :$name1)";
454 $params[$name1] = $value;
455 break;
456 case TOKEN_INSTANCE:
457 $SQLString .= "($instancefield = :$name1)";
458 $params[$name1] = $value;
459 break;
460 case TOKEN_DATETO:
461 $SQLString .= "($timefield <= :$name1)";
462 $params[$name1] = $value;
463 break;
464 case TOKEN_DATEFROM:
465 $SQLString .= "($timefield >= :$name1)";
466 $params[$name1] = $value;
467 break;
468 case TOKEN_NEGATE:
469 $SQLString .= "(NOT ((".$DB->sql_like($datafield, ":$name1", false).") OR (".$DB->sql_like($metafield, ":$name2", false).")))";
470 $params[$name1] = "%$value%";
471 $params[$name2] = "%$value%";
472 break;
473 default:
474 return '';
478 return array($SQLString, $params);