2 // This file is part of Moodle - http://moodle.org/
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
18 * Base class for search engines.
20 * All search engines must extend this class.
22 * @package core_search
23 * @copyright 2015 Daniel Neis
24 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
27 namespace core_search
;
29 defined('MOODLE_INTERNAL') ||
die();
32 * Base class for search engines.
34 * All search engines must extend this class.
36 * @package core_search
37 * @copyright 2015 Daniel Neis
38 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
40 abstract class engine
{
43 * The search engine configuration.
47 protected $config = null;
50 * Last executed query error, if there was any.
53 protected $queryerror = null;
56 * @var array Internal cache.
58 protected $cachedareas = array();
61 * @var array Internal cache.
63 protected $cachedcourses = array();
66 * User data required to show their fullnames. Indexed by userid.
70 protected static $cachedusers = array();
73 * @var string Frankenstyle plugin name.
75 protected $pluginname = null;
78 * @var bool If true, should skip schema validity check when checking the search engine is ready
80 protected $skipschemacheck = false;
83 * Initialises the search engine configuration.
85 * Search engine availability should be checked separately.
89 public function __construct() {
91 $classname = get_class($this);
92 if (strpos($classname, '\\') === false) {
93 throw new \
coding_exception('"' . $classname . '" class should specify its component namespace and it should be named engine.');
94 } else if (strpos($classname, '_') === false) {
95 throw new \
coding_exception('"' . $classname . '" class namespace should be its frankenstyle name');
98 // This is search_xxxx config.
99 $this->pluginname
= substr($classname, 0, strpos($classname, '\\'));
100 if ($config = get_config($this->pluginname
)) {
101 $this->config
= $config;
103 $this->config
= new stdClass();
108 * Returns a course instance checking internal caching.
110 * @param int $courseid
113 protected function get_course($courseid) {
114 if (!empty($this->cachedcourses
[$courseid])) {
115 return $this->cachedcourses
[$courseid];
118 // No need to clone, only read.
119 $this->cachedcourses
[$courseid] = get_course($courseid, false);
121 return $this->cachedcourses
[$courseid];
125 * Returns user data checking the internal static cache.
127 * Including here the minimum required user information as this may grow big.
132 public function get_user($userid) {
135 if (empty(self
::$cachedusers[$userid])) {
136 $fields = get_all_user_name_fields(true);
137 self
::$cachedusers[$userid] = $DB->get_record('user', array('id' => $userid), 'id, ' . $fields);
139 return self
::$cachedusers[$userid];
143 * Clears the users cache.
147 public static function clear_users_cache() {
148 self
::$cachedusers = [];
152 * Returns a search instance of the specified area checking internal caching.
154 * @param string $areaid Area id
155 * @return \core_search\base
157 protected function get_search_area($areaid) {
159 if (isset($this->cachedareas
[$areaid]) && $this->cachedareas
[$areaid] === false) {
160 // We already checked that area and it is not available.
164 if (!isset($this->cachedareas
[$areaid])) {
165 // First result that matches this area.
167 $this->cachedareas
[$areaid] = \core_search\manager
::get_search_area($areaid);
168 if ($this->cachedareas
[$areaid] === false) {
169 // The area does not exist or it is not available any more.
171 $this->cachedareas
[$areaid] = false;
175 if (!$this->cachedareas
[$areaid]->is_enabled()) {
176 // We skip the area if it is not enabled.
178 // Marking it as false so next time we don' need to check it again.
179 $this->cachedareas
[$areaid] = false;
185 return $this->cachedareas
[$areaid];
189 * Returns a document instance prepared to be rendered.
191 * @param \core_search\base $searcharea
192 * @param array $docdata
193 * @return \core_search\document
195 protected function to_document(\core_search\base
$searcharea, $docdata) {
197 list($componentname, $areaname) = \core_search\manager
::extract_areaid_parts($docdata['areaid']);
198 $doc = \core_search\document_factory
::instance($docdata['itemid'], $componentname, $areaname, $this);
199 $doc->set_data_from_engine($docdata);
200 $doc->set_doc_url($searcharea->get_doc_url($doc));
201 $doc->set_context_url($searcharea->get_context_url($doc));
202 $doc->set_doc_icon($searcharea->get_doc_icon($doc));
204 // Uses the internal caches to get required data needed to render the document later.
205 $course = $this->get_course($doc->get('courseid'));
206 $doc->set_extra('coursefullname', $course->fullname
);
208 if ($doc->is_set('userid')) {
209 $user = $this->get_user($doc->get('userid'));
210 $doc->set_extra('userfullname', fullname($user));
217 * Loop through given iterator of search documents
218 * and and have the search engine back end add them
221 * @param iterator $iterator the iterator of documents to index
222 * @param searcharea $searcharea the area for the documents to index
223 * @param array $options document indexing options
224 * @return array Processed document counts
226 public function add_documents($iterator, $searcharea, $options) {
231 $firstindexeddoc = 0;
233 $lastprogress = manager
::get_current_time();
235 foreach ($iterator as $document) {
236 // Stop if we have exceeded the time limit (and there are still more items). Always
237 // do at least one second's worth of documents otherwise it will never make progress.
238 if ($lastindexeddoc !== $firstindexeddoc &&
239 !empty($options['stopat']) && manager
::get_current_time() >= $options['stopat']) {
244 if (!$document instanceof \core_search\document
) {
248 if (isset($options['lastindexedtime']) && $options['lastindexedtime'] == 0) {
249 // If we have never indexed this area before, it must be new.
250 $document->set_is_new(true);
253 if ($options['indexfiles']) {
254 // Attach files if we are indexing.
255 $searcharea->attach_files($document);
258 if ($this->add_document($document, $options['indexfiles'])) {
264 $lastindexeddoc = $document->get('modified');
265 if (!$firstindexeddoc) {
266 $firstindexeddoc = $lastindexeddoc;
270 // If indexing the area takes a long time, periodically output progress information.
271 if (isset($options['progress'])) {
272 $now = manager
::get_current_time();
273 if ($now - $lastprogress >= manager
::DISPLAY_INDEXING_PROGRESS_EVERY
) {
274 $lastprogress = $now;
275 // The first date format is the same used in cron_trace_time_and_memory().
276 $options['progress']->output(date('H:i:s', $now) . ': Done to ' . userdate(
277 $lastindexeddoc, get_string('strftimedatetimeshort', 'langconfig')), 1);
282 return array($numrecords, $numdocs, $numdocsignored, $lastindexeddoc, $partial);
286 * Returns the plugin name.
288 * @return string Frankenstyle plugin name.
290 public function get_plugin_name() {
291 return $this->pluginname
;
295 * Gets the document class used by this search engine.
297 * Search engines can overwrite \core_search\document with \search_ENGINENAME\document class.
299 * Looks for a document class in the current search engine namespace, falling back to \core_search\document.
301 * Publicly available because search areas do not have access to the engine details,
302 * \core_search\document_factory accesses this function.
306 public function get_document_classname() {
307 $classname = $this->pluginname
. '\\document';
308 if (!class_exists($classname)) {
309 $classname = '\\core_search\\document';
315 * Run any pre-indexing operations.
317 * Should be overwritten if the search engine needs to do any pre index preparation.
319 * @param bool $fullindex True if a full index will be performed
322 public function index_starting($fullindex = false) {
323 // Nothing by default.
327 * Run any post indexing operations.
329 * Should be overwritten if the search engine needs to do any post index cleanup.
331 * @param int $numdocs The number of documents that were added to the index
332 * @param bool $fullindex True if a full index was performed
335 public function index_complete($numdocs = 0, $fullindex = false) {
336 // Nothing by default.
340 * Do anything that may need to be done before an area is indexed.
342 * @param \core_search\base $searcharea The search area that was complete
343 * @param bool $fullindex True if a full index is being performed
346 public function area_index_starting($searcharea, $fullindex = false) {
347 // Nothing by default.
351 * Do any area cleanup needed, and do anything to confirm contents.
353 * Return false to prevent the search area completed time and stats from being updated.
355 * @param \core_search\base $searcharea The search area that was complete
356 * @param int $numdocs The number of documents that were added to the index
357 * @param bool $fullindex True if a full index is being performed
358 * @return bool True means that data is considered indexed
360 public function area_index_complete($searcharea, $numdocs = 0, $fullindex = false) {
365 * Optimizes the search engine.
367 * Should be overwritten if the search engine can optimize its contents.
371 public function optimize() {
372 // Nothing by default.
376 * Does the system satisfy all the requirements.
378 * Should be overwritten if the search engine has any system dependencies
379 * that needs to be checked.
383 public function is_installed() {
388 * Returns any error reported by the search engine when executing the provided query.
390 * It should be called from static::execute_query when an exception is triggered.
394 public function get_query_error() {
395 return $this->queryerror
;
399 * Returns the total number of documents available for the most recent call to execute_query.
401 * This can be an estimate, but should get more accurate the higher the limited passed to execute_query is.
402 * To do that, the engine can use (actual result returned count + count of unchecked documents), or
403 * (total possible docs - docs that have been checked and rejected).
405 * Engine can limit to manager::MAX_RESULTS if there is cost to determining more.
406 * If this cannot be computed in a reasonable way, manager::MAX_RESULTS may be returned.
410 abstract public function get_query_total_count();
413 * Return true if file indexing is supported and enabled. False otherwise.
417 public function file_indexing_enabled() {
422 * Clears the current query error value.
426 public function clear_query_error() {
427 $this->queryerror
= null;
431 * Is the server ready to use?
433 * This should also check that the search engine configuration is ok.
435 * If the function $this->should_skip_schema_check() returns true, then this function may leave
436 * out time-consuming checks that the schema is valid. (This allows for improved performance on
437 * critical pages such as the main search form.)
439 * @return true|string Returns true if all good or an error string.
441 abstract function is_server_ready();
444 * Tells the search engine to skip any time-consuming checks that it might do as part of the
445 * is_server_ready function, and only carry out a basic check that it can contact the server.
447 * This setting is not remembered and applies only to the current request.
450 * @param bool $skip True to skip the checks, false to start checking again
452 public function skip_schema_check($skip = true) {
453 $this->skipschemacheck
= $skip;
457 * For use by subclasses. The engine can call this inside is_server_ready to check whether it
458 * should skip time-consuming schema checks.
461 * @return bool True if schema checks should be skipped
463 protected function should_skip_schema_check() {
464 return $this->skipschemacheck
;
468 * Adds a document to the search engine.
470 * @param document $document
471 * @param bool $fileindexing True if file indexing is to be used
472 * @return bool False if the file was skipped or failed, true on success
474 abstract function add_document($document, $fileindexing = false);
477 * Executes the query on the engine.
479 * Implementations of this function should check user context array to limit the results to contexts where the
480 * user have access. They should also limit the owneruserid field to manger::NO_OWNER_ID or the current user's id.
481 * Engines must use area->check_access() to confirm user access.
483 * Engines should reasonably attempt to fill up to limit with valid results if they are available.
485 * The $filters object may include the following fields (optional except q):
486 * - q: value of main search field; results should include this text
487 * - title: if included, title must match this search
488 * - areaids: array of search area id strings (only these areas will be searched)
489 * - courseids: array of course ids (only these courses will be searched)
490 * - groupids: array of group ids (only results specifically from these groupids will be
491 * searched) - this option will be ignored if the search engine doesn't support groups
493 * The $accessinfo parameter has two different values (for historical compatibility). If the
494 * engine returns false to supports_group_filtering then it is an array of user contexts, or
495 * true if the user can access all contexts. (This parameter used to be called $usercontexts.)
496 * If the engine returns true to supports_group_filtering then it will be an object containing
498 * - everything (true if admin is searching with no restrictions)
499 * - usercontexts (same as above)
500 * - separategroupscontexts (array of context ids where separate groups are used)
501 * - visiblegroupscontextsareas (array of subset of those where some areas use visible groups)
502 * - usergroups (array of relevant group ids that user belongs to)
504 * The engine should apply group restrictions to those contexts listed in the
505 * 'separategroupscontexts' array. In these contexts, it shouled only include results if the
506 * groupid is not set, or if the groupid matches one of the values in USER_GROUPS array, or
507 * if the search area is one of those listed in 'visiblegroupscontextsareas' for that context.
509 * @param \stdClass $filters Query and filters to apply.
510 * @param \stdClass $accessinfo Information about the contexts the user can access
511 * @param int $limit The maximum number of results to return. If empty, limit to manager::MAX_RESULTS.
512 * @return \core_search\document[] Results or false if no results
514 public abstract function execute_query($filters, $accessinfo, $limit = 0);
517 * Delete all documents.
519 * @param string $areaid To filter by area
522 abstract function delete($areaid = null);
525 * Deletes information related to a specific context id. This should be used when the context
526 * itself is deleted from Moodle.
528 * This only deletes information for the specified context - not for any child contexts.
530 * This function is optional; if not supported it will return false and the information will
531 * not be deleted from the search index.
533 * If an engine implements this function it should also implement delete_index_for_course;
534 * otherwise, nothing will be deleted when users delete an entire course at once.
536 * @param int $oldcontextid ID of context that has been deleted
537 * @return bool True if implemented
538 * @throws \core_search\engine_exception Engines may throw this exception for any problem
540 public function delete_index_for_context(int $oldcontextid) {
545 * Deletes information related to a specific course id. This should be used when the course
546 * itself is deleted from Moodle.
548 * This deletes all information relating to that course from the index, including all child
551 * This function is optional; if not supported it will return false and the information will
552 * not be deleted from the search index.
554 * If an engine implements this function then, ideally, it should also implement
555 * delete_index_for_context so that deletion of single activities/blocks also works.
557 * @param int $oldcourseid ID of course that has been deleted
558 * @return bool True if implemented
559 * @throws \core_search\engine_exception Engines may throw this exception for any problem
561 public function delete_index_for_course(int $oldcourseid) {
566 * Checks that the schema is the latest version. If the version stored in config does not match
567 * the current, this function will attempt to upgrade the schema.
569 * @return bool|string True if schema is OK, a string if user needs to take action
571 public function check_latest_schema() {
572 if (empty($this->config
->schemaversion
)) {
575 $currentversion = $this->config
->schemaversion
;
577 if ($currentversion < document
::SCHEMA_VERSION
) {
578 return $this->update_schema((int)$currentversion, (int)document
::SCHEMA_VERSION
);
585 * Usually called by the engine; marks that the schema has been updated.
587 * @param int $version Records the schema version now applied
589 public function record_applied_schema_version($version) {
590 set_config('schemaversion', $version, $this->pluginname
);
594 * Requests the search engine to upgrade the schema. The engine should update the schema if
595 * possible/necessary, and should ensure that record_applied_schema_version is called as a
598 * If it is not possible to upgrade the schema at the moment, it can do nothing and return; the
599 * function will be called again next time search is initialised.
601 * The default implementation just returns, with a DEBUG_DEVELOPER warning.
603 * @param int $oldversion Old schema version
604 * @param int $newversion New schema version
605 * @return bool|string True if schema is updated successfully, a string if it needs updating manually
607 protected function update_schema($oldversion, $newversion) {
608 debugging('Unable to update search engine schema: ' . $this->pluginname
, DEBUG_DEVELOPER
);
609 return get_string('schemanotupdated', 'search');
613 * Checks if this search engine supports groups.
615 * Note that returning true to this function causes the parameters to execute_query to be
616 * passed differently!
618 * In order to implement groups and return true to this function, the search engine should:
620 * 1. Handle the fields ->separategroupscontexts and ->usergroups in the $accessinfo parameter
621 * to execute_query (ideally, using these to automatically restrict search results).
622 * 2. Support the optional groupids parameter in the $filter parameter for execute_query to
623 * restrict results to only those where the stored groupid matches the given value.
625 * @return bool True if this engine supports searching by group id field
627 public function supports_group_filtering() {
632 * Obtain a list of results orders (and names for them) that are supported by this
633 * search engine in the given context.
635 * By default, engines sort by relevance only.
637 * @param \context $context Context that the user requested search from
638 * @return array Array from order name => display text
640 public function get_supported_orders(\context
$context) {
641 return ['relevance' => get_string('order_relevance', 'search')];
645 * Checks if the search engine supports searching by user.
647 * If it returns true to this function, the search engine should support the 'userids' option
648 * in the $filters value passed to execute_query(), returning only items where the userid in
649 * the search document matches one of those user ids.
651 * @return bool True if the search engine supports searching by user
653 public function supports_users() {