Merge branch 'MDL-78408-402' of https://github.com/snake/moodle into MOODLE_402_STABLE
[moodle.git] / search / classes / base.php
blobf73238e569f18188fb0d0bc9f6788a35a962e8e7
1 <?php
2 // This file is part of Moodle - http://moodle.org/
3 //
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
17 /**
18 * Search base class to be extended by search areas.
20 * @package core_search
21 * @copyright 2015 David Monllao {@link http://www.davidmonllao.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25 namespace core_search;
27 defined('MOODLE_INTERNAL') || die();
29 /**
30 * Base search implementation.
32 * Components and plugins interested in filling the search engine with data should extend this class (or any extension of this
33 * class).
35 * @package core_search
36 * @copyright 2015 David Monllao {@link http://www.davidmonllao.com}
37 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
39 abstract class base {
41 /**
42 * The area name as defined in the class name.
44 * @var string
46 protected $areaname = null;
48 /**
49 * The component frankenstyle name.
51 * @var string
53 protected $componentname = null;
55 /**
56 * The component type (core or the plugin type).
58 * @var string
60 protected $componenttype = null;
62 /**
63 * The context levels the search implementation is working on.
65 * @var array
67 protected static $levels = [CONTEXT_SYSTEM];
69 /**
70 * An area id from the componentname and the area name.
72 * @var string
74 public $areaid;
76 /**
77 * Constructor.
79 * @throws \coding_exception
80 * @return void
82 public final function __construct() {
84 $classname = get_class($this);
86 // Detect possible issues when defining the class.
87 if (strpos($classname, '\search') === false) {
88 throw new \coding_exception('Search area classes should be located in \PLUGINTYPE_PLUGINNAME\search\AREANAME.');
89 } else if (strpos($classname, '_') === false) {
90 throw new \coding_exception($classname . ' class namespace level 1 should be its component frankenstyle name');
93 $this->areaname = substr(strrchr($classname, '\\'), 1);
94 $this->componentname = substr($classname, 0, strpos($classname, '\\'));
95 $this->areaid = \core_search\manager::generate_areaid($this->componentname, $this->areaname);
96 $this->componenttype = substr($this->componentname, 0, strpos($this->componentname, '_'));
99 /**
100 * Returns context levels property.
102 * @return int
104 public static function get_levels() {
105 return static::$levels;
109 * Returns the area id.
111 * @return string
113 public function get_area_id() {
114 return $this->areaid;
118 * Returns the moodle component name.
120 * It might be the plugin name (whole frankenstyle name) or the core subsystem name.
122 * @return string
124 public function get_component_name() {
125 return $this->componentname;
129 * Returns the component type.
131 * It might be a plugintype or 'core' for core subsystems.
133 * @return string
135 public function get_component_type() {
136 return $this->componenttype;
140 * Returns the area visible name.
142 * @param bool $lazyload Usually false, unless when in admin settings.
143 * @return string
145 public function get_visible_name($lazyload = false) {
147 $component = $this->componentname;
149 // Core subsystem strings go to lang/XX/search.php.
150 if ($this->componenttype === 'core') {
151 $component = 'search';
153 return get_string('search:' . $this->areaname, $component, null, $lazyload);
157 * Returns the config var name.
159 * It depends on whether it is a moodle subsystem or a plugin as plugin-related config should remain in their own scope.
161 * @access private
162 * @return string Config var path including the plugin (or component) and the varname
164 public function get_config_var_name() {
166 if ($this->componenttype === 'core') {
167 // Core subsystems config in core_search and setting name using only [a-zA-Z0-9_]+.
168 $parts = \core_search\manager::extract_areaid_parts($this->areaid);
169 return array('core_search', $parts[0] . '_' . $parts[1]);
172 // Plugins config in the plugin scope.
173 return array($this->componentname, 'search_' . $this->areaname);
177 * Returns all the search area configuration.
179 * @return array
181 public function get_config() {
182 list($componentname, $varname) = $this->get_config_var_name();
184 $config = [];
185 $settingnames = self::get_settingnames();
186 foreach ($settingnames as $name) {
187 $config[$varname . $name] = get_config($componentname, $varname . $name);
190 // Search areas are enabled by default.
191 if ($config[$varname . '_enabled'] === false) {
192 $config[$varname . '_enabled'] = 1;
194 return $config;
198 * Return a list of all required setting names.
200 * @return array
202 public static function get_settingnames() {
203 return array('_enabled', '_indexingstart', '_indexingend', '_lastindexrun',
204 '_docsignored', '_docsprocessed', '_recordsprocessed', '_partial');
208 * Is the search component enabled by the system administrator?
210 * @return bool
212 public function is_enabled() {
213 list($componentname, $varname) = $this->get_config_var_name();
215 $value = get_config($componentname, $varname . '_enabled');
217 // Search areas are enabled by default.
218 if ($value === false) {
219 $value = 1;
221 return (bool)$value;
224 public function set_enabled($isenabled) {
225 list($componentname, $varname) = $this->get_config_var_name();
226 return set_config($varname . '_enabled', $isenabled, $componentname);
230 * Gets the length of time spent indexing this area (the last time it was indexed).
232 * @return int|bool Time in seconds spent indexing this area last time, false if never indexed
234 public function get_last_indexing_duration() {
235 list($componentname, $varname) = $this->get_config_var_name();
236 $start = get_config($componentname, $varname . '_indexingstart');
237 $end = get_config($componentname, $varname . '_indexingend');
238 if ($start && $end) {
239 return $end - $start;
240 } else {
241 return false;
246 * Returns true if this area uses file indexing.
248 * @return bool
250 public function uses_file_indexing() {
251 return false;
255 * Returns a recordset ordered by modification date ASC.
257 * Each record can include any data self::get_document might need but it must:
258 * - Include an 'id' field: Unique identifier (in this area's scope) of a document to index in the search engine
259 * If the indexed content field can contain embedded files, the 'id' value should match the filearea itemid.
260 * - Only return data modified since $modifiedfrom, including $modifiedform to prevent
261 * some records from not being indexed (e.g. your-timemodified-fieldname >= $modifiedfrom)
262 * - Order the returned data by time modified in ascending order, as \core_search::manager will need to store the modified time
263 * of the last indexed document.
265 * Since Moodle 3.4, subclasses should instead implement get_document_recordset, which has
266 * an additional context parameter. This function continues to work for implementations which
267 * haven't been updated, or where the context parameter is not required.
269 * @param int $modifiedfrom
270 * @return \moodle_recordset
272 public function get_recordset_by_timestamp($modifiedfrom = 0) {
273 $result = $this->get_document_recordset($modifiedfrom);
274 if ($result === false) {
275 throw new \coding_exception(
276 'Search area must implement get_document_recordset or get_recordset_by_timestamp');
278 return $result;
282 * Returns a recordset containing all items from this area, optionally within the given context,
283 * and including only items modifed from (>=) the specified time. The recordset must be ordered
284 * in ascending order of modified time.
286 * Each record can include any data self::get_document might need. It must include an 'id'
287 * field,a unique identifier (in this area's scope) of a document to index in the search engine.
288 * If the indexed content field can contain embedded files, the 'id' value should match the
289 * filearea itemid.
291 * The return value can be a recordset, null (if this area does not provide any results in the
292 * given context and there is no need to do a database query to find out), or false (if this
293 * facility is not currently supported by this search area).
295 * If this function returns false, then:
296 * - If indexing the entire system (no context restriction) the search indexer will try
297 * get_recordset_by_timestamp instead
298 * - If trying to index a context (e.g. when restoring a course), the search indexer will not
299 * index this area, so that restored content may not be indexed.
301 * The default implementation returns false, indicating that this facility is not supported and
302 * the older get_recordset_by_timestamp function should be used.
304 * This function must accept all possible values for the $context parameter. For example, if
305 * you are implementing this function for the forum module, it should still operate correctly
306 * if called with the context for a glossary module, or for the HTML block. (In these cases
307 * where it will not return any data, it may return null.)
309 * The $context parameter can also be null or the system context; both of these indicate that
310 * all data, without context restriction, should be returned.
312 * @param int $modifiedfrom Return only records modified after this date
313 * @param \context|null $context Context (null means no context restriction)
314 * @return \moodle_recordset|null|false Recordset / null if no results / false if not supported
315 * @since Moodle 3.4
317 public function get_document_recordset($modifiedfrom = 0, \context $context = null) {
318 return false;
322 * Checks if get_document_recordset is supported for this search area.
324 * For many uses you can simply call get_document_recordset and see if it returns false, but
325 * this function is useful when you don't want to actually call the function right away.
327 public function supports_get_document_recordset() {
328 // Easiest way to check this is simply to see if the class has overridden the default
329 // function.
330 $method = new \ReflectionMethod($this, 'get_document_recordset');
331 return $method->getDeclaringClass()->getName() !== self::class;
335 * Returns the document related with the provided record.
337 * This method receives a record with the document id and other info returned by get_recordset_by_timestamp
338 * or get_recordset_by_contexts that might be useful here. The idea is to restrict database queries to
339 * minimum as this function will be called for each document to index. As an alternative, use cached data.
341 * Internally it should use \core_search\document to standarise the documents before sending them to the search engine.
343 * Search areas should send plain text to the search engine, use the following function to convert any user
344 * input data to plain text: {@link content_to_text}
346 * Valid keys for the options array are:
347 * indexfiles => File indexing is enabled if true.
348 * lastindexedtime => The last time this area was indexed. 0 if never indexed.
350 * The lastindexedtime value is not set if indexing a specific context rather than the whole
351 * system.
353 * @param \stdClass $record A record containing, at least, the indexed document id and a modified timestamp
354 * @param array $options Options for document creation
355 * @return \core_search\document
357 abstract public function get_document($record, $options = array());
360 * Returns the document title to display.
362 * Allow to customize the document title string to display.
364 * @param \core_search\document $doc
365 * @return string Document title to display in the search results page
367 public function get_document_display_title(\core_search\document $doc) {
369 return $doc->get('title');
373 * Return the context info required to index files for
374 * this search area.
376 * Should be onerridden by each search area.
378 * @return array
380 public function get_search_fileareas() {
381 $fileareas = array();
383 return $fileareas;
387 * Files related to the current document are attached,
388 * to the document object ready for indexing by
389 * Global Search.
391 * The default implementation retrieves all files for
392 * the file areas returned by get_search_fileareas().
393 * If you need to filter files to specific items per
394 * file area, you will need to override this method
395 * and explicitly provide the items.
397 * @param document $document The current document
398 * @return void
400 public function attach_files($document) {
401 $fileareas = $this->get_search_fileareas();
402 $contextid = $document->get('contextid');
403 $component = $this->get_component_name();
404 $itemid = $document->get('itemid');
406 foreach ($fileareas as $filearea) {
407 $fs = get_file_storage();
408 $files = $fs->get_area_files($contextid, $component, $filearea, $itemid, '', false);
410 foreach ($files as $file) {
411 $document->add_stored_file($file);
418 * Can the current user see the document.
420 * @param int $id The internal search area entity id.
421 * @return int manager:ACCESS_xx constant
423 abstract public function check_access($id);
426 * Returns a url to the document, it might match self::get_context_url().
428 * @param \core_search\document $doc
429 * @return \moodle_url
431 abstract public function get_doc_url(\core_search\document $doc);
434 * Returns a url to the document context.
436 * @param \core_search\document $doc
437 * @return \moodle_url
439 abstract public function get_context_url(\core_search\document $doc);
442 * Helper function that gets SQL useful for restricting a search query given a passed-in
443 * context, for data stored at course level.
445 * The SQL returned will be zero or more JOIN statements, surrounded by whitespace, which act
446 * as restrictions on the query based on the rows in a module table.
448 * You can pass in a null or system context, which will both return an empty string and no
449 * params.
451 * Returns an array with two nulls if there can be no results for a course within this context.
453 * If named parameters are used, these will be named gclcrs0, gclcrs1, etc. The table aliases
454 * used in SQL also all begin with gclcrs, to avoid conflicts.
456 * @param \context|null $context Context to restrict the query
457 * @param string $coursetable Name of alias for course table e.g. 'c'
458 * @param int $paramtype Type of SQL parameters to use (default question mark)
459 * @return array Array with SQL and parameters; both null if no need to query
460 * @throws \coding_exception If called with invalid params
462 protected function get_course_level_context_restriction_sql(?\context $context,
463 $coursetable, $paramtype = SQL_PARAMS_QM) {
464 global $DB;
466 if (!$context) {
467 return ['', []];
470 switch ($paramtype) {
471 case SQL_PARAMS_QM:
472 $param1 = '?';
473 $param2 = '?';
474 $key1 = 0;
475 $key2 = 1;
476 break;
477 case SQL_PARAMS_NAMED:
478 $param1 = ':gclcrs0';
479 $param2 = ':gclcrs1';
480 $key1 = 'gclcrs0';
481 $key2 = 'gclcrs1';
482 break;
483 default:
484 throw new \coding_exception('Unexpected $paramtype: ' . $paramtype);
487 $params = [];
488 switch ($context->contextlevel) {
489 case CONTEXT_SYSTEM:
490 $sql = '';
491 break;
493 case CONTEXT_COURSECAT:
494 // Find all courses within the specified category or any sub-category.
495 $pathmatch = $DB->sql_like('gclcrscc2.path',
496 $DB->sql_concat('gclcrscc1.path', $param2));
497 $sql = " JOIN {course_categories} gclcrscc1 ON gclcrscc1.id = $param1
498 JOIN {course_categories} gclcrscc2 ON gclcrscc2.id = $coursetable.category
499 AND (gclcrscc2.id = gclcrscc1.id OR $pathmatch) ";
500 $params[$key1] = $context->instanceid;
501 // Note: This param is a bit annoying as it obviously never changes, but sql_like
502 // throws a debug warning if you pass it anything with quotes in, so it has to be
503 // a bound parameter.
504 $params[$key2] = '/%';
505 break;
507 case CONTEXT_COURSE:
508 // We just join again against the same course entry and confirm that it has the
509 // same id as the context.
510 $sql = " JOIN {course} gclcrsc ON gclcrsc.id = $coursetable.id
511 AND gclcrsc.id = $param1";
512 $params[$key1] = $context->instanceid;
513 break;
515 case CONTEXT_BLOCK:
516 case CONTEXT_MODULE:
517 case CONTEXT_USER:
518 // Context cannot contain any courses.
519 return [null, null];
521 default:
522 throw new \coding_exception('Unexpected contextlevel: ' . $context->contextlevel);
525 return [$sql, $params];
529 * Gets a list of all contexts to reindex when reindexing this search area. The list should be
530 * returned in an order that is likely to be suitable when reindexing, for example with newer
531 * contexts first.
533 * The default implementation simply returns the system context, which will result in
534 * reindexing everything in normal date order (oldest first).
536 * @return \Iterator Iterator of contexts to reindex
538 public function get_contexts_to_reindex() {
539 return new \ArrayIterator([\context_system::instance()]);
543 * Returns an icon instance for the document.
545 * @param \core_search\document $doc
546 * @return \core_search\document_icon
548 public function get_doc_icon(document $doc) : document_icon {
549 return new document_icon('i/empty');
553 * Returns a list of category names associated with the area.
555 * @return array
557 public function get_category_names() {
558 return [manager::SEARCH_AREA_CATEGORY_OTHER];