MDL-53325 search: Remove commit from engine interface
[moodle.git] / search / classes / manager.php
blob3a5696107782d0e1c0f571201ec611e1fdb4896c
1 <?php
2 // This file is part of Moodle - http://moodle.org/
3 //
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
17 /**
18 * Search subsystem manager.
20 * @package core_search
21 * @copyright Prateek Sachan {@link http://prateeksachan.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25 namespace core_search;
27 defined('MOODLE_INTERNAL') || die;
29 require_once($CFG->dirroot . '/lib/accesslib.php');
31 /**
32 * Search subsystem manager.
34 * @package core_search
35 * @copyright Prateek Sachan {@link http://prateeksachan.com}
36 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
38 class manager {
40 /**
41 * @var int Text contents.
43 const TYPE_TEXT = 1;
45 /**
46 * @var int User can not access the document.
48 const ACCESS_DENIED = 0;
50 /**
51 * @var int User can access the document.
53 const ACCESS_GRANTED = 1;
55 /**
56 * @var int The document was deleted.
58 const ACCESS_DELETED = 2;
60 /**
61 * @var int Maximum number of results that will be retrieved from the search engine.
63 const MAX_RESULTS = 100;
65 /**
66 * @var int Number of results per page.
68 const DISPLAY_RESULTS_PER_PAGE = 10;
70 /**
71 * @var \core_search\area\base[] Enabled search areas.
73 protected static $enabledsearchareas = null;
75 /**
76 * @var \core_search\area\base[] All system search areas.
78 protected static $allsearchareas = null;
80 /**
81 * @var \core_search\manager
83 protected static $instance = null;
85 /**
86 * @var \core_search\engine
88 protected $engine = null;
90 /**
91 * Constructor, use \core_search\manager::instance instead to get a class instance.
93 * @param \core_search\area\base The search engine to use
95 public function __construct($engine) {
96 $this->engine = $engine;
99 /**
100 * Returns an initialised \core_search instance.
102 * It requires global search to be enabled. Use \core_search\manager::is_global_search_enabled
103 * to verify it is enabled.
105 * @throws \moodle_exception
106 * @throws \core_search\engine_exception
107 * @return \core_search\manager
109 public static function instance() {
110 global $CFG;
112 // One per request, this should be purged during testing.
113 if (static::$instance !== null) {
114 return static::$instance;
117 if (!static::is_global_search_enabled()) {
118 throw new \moodle_exception('globalsearchdisabled', 'search');
121 if (!$engine = static::search_engine_instance()) {
122 throw new \core_search\engine_exception('enginenotfound', 'search', '', $CFG->searchengine);
125 if (!$engine->is_installed()) {
126 throw new \core_search\engine_exception('enginenotinstalled', 'search', '', $CFG->searchengine);
129 $serverstatus = $engine->is_server_ready();
130 if ($serverstatus !== true) {
131 // Error message with no details as this is an exception that any user may find if the server crashes.
132 throw new \core_search\engine_exception('engineserverstatus', 'search');
135 static::$instance = new \core_search\manager($engine);
136 return static::$instance;
140 * Returns whether global search is enabled or not.
142 * @return bool
144 public static function is_global_search_enabled() {
145 global $CFG;
146 return !empty($CFG->enableglobalsearch);
150 * Returns an instance of the search engine.
152 * @return \core_search\engine
154 public static function search_engine_instance() {
155 global $CFG;
157 $classname = '\\search_' . $CFG->searchengine . '\\engine';
158 if (!class_exists($classname)) {
159 return false;
162 return new $classname();
166 * Returns the search engine.
168 * @return \core_search\engine
170 public function get_engine() {
171 return $this->engine;
175 * Returns a search area class name.
177 * @param string $areaid
178 * @return string
180 protected static function get_area_classname($areaid) {
181 list($componentname, $areaname) = static::extract_areaid_parts($areaid);
182 return '\\' . $componentname . '\\search\\' . $areaname;
186 * Returns a new area search indexer instance.
188 * @param string $areaid
189 * @return \core_search\area\base|bool False if the area is not available.
191 public static function get_search_area($areaid) {
193 // Try both caches, it does not matter where it comes from.
194 if (!empty(static::$allsearchareas[$areaid])) {
195 return static::$allsearchareas[$areaid];
197 if (!empty(static::$enabledsearchareas[$areaid])) {
198 return static::$enabledsearchareas[$areaid];
201 $classname = static::get_area_classname($areaid);
202 if (class_exists($classname)) {
203 return new $classname();
206 return false;
210 * Return the list of available search areas.
212 * @param bool $enabled Return only the enabled ones.
213 * @return \core_search\area\base[]
215 public static function get_search_areas_list($enabled = false) {
217 // Two different arrays, we don't expect these arrays to be big.
218 if (!$enabled && static::$allsearchareas !== null) {
219 return static::$allsearchareas;
220 } else if ($enabled && static::$enabledsearchareas !== null) {
221 return static::$enabledsearchareas;
224 $searchareas = array();
226 $plugintypes = \core_component::get_plugin_types();
227 foreach ($plugintypes as $plugintype => $unused) {
228 $plugins = \core_component::get_plugin_list($plugintype);
229 foreach ($plugins as $pluginname => $pluginfullpath) {
231 $componentname = $plugintype . '_' . $pluginname;
232 $searchclasses = \core_component::get_component_classes_in_namespace($componentname, 'search');
233 foreach ($searchclasses as $classname => $classpath) {
234 $areaname = substr(strrchr($classname, '\\'), 1);
235 $areaid = static::generate_areaid($componentname, $areaname);
236 $searchclass = new $classname();
237 if (!$enabled || ($enabled && $searchclass->is_enabled())) {
238 $searchareas[$areaid] = $searchclass;
244 $subsystems = \core_component::get_core_subsystems();
245 foreach ($subsystems as $subsystemname => $subsystempath) {
246 $componentname = 'core_' . $subsystemname;
247 $searchclasses = \core_component::get_component_classes_in_namespace($componentname, 'search');
249 foreach ($searchclasses as $classname => $classpath) {
250 $areaname = substr(strrchr($classname, '\\'), 1);
251 $areaid = static::generate_areaid($componentname, $areaname);
252 $searchclass = new $classname();
253 if (!$enabled || ($enabled && $searchclass->is_enabled())) {
254 $searchareas[$areaid] = $searchclass;
259 // Cache results.
260 if ($enabled) {
261 static::$enabledsearchareas = $searchareas;
262 } else {
263 static::$allsearchareas = $searchareas;
266 return $searchareas;
270 * Clears all static caches.
272 * @return void
274 public static function clear_static() {
276 static::$enabledsearchareas = null;
277 static::$allsearchareas = null;
278 static::$instance = null;
282 * Generates an area id from the componentname and the area name.
284 * There should not be any naming conflict as the area name is the
285 * class name in component/classes/search/.
287 * @param string $componentname
288 * @param string $areaname
289 * @return void
291 public static function generate_areaid($componentname, $areaname) {
292 return $componentname . '-' . $areaname;
296 * Returns all areaid string components (component name and area name).
298 * @param string $areaid
299 * @return array Component name (Frankenstyle) and area name (search area class name)
301 public static function extract_areaid_parts($areaid) {
302 return explode('-', $areaid);
306 * Returns the contexts the user can access.
308 * The returned value is a multidimensional array because some search engines can group
309 * information and there will be a performance benefit on passing only some contexts
310 * instead of the whole context array set.
312 * @return bool|array Indexed by area identifier (component + area name). Returns true if the user can see everything.
314 protected function get_areas_user_accesses() {
315 global $CFG, $USER;
317 // All results for admins. Eventually we could add a new capability for managers.
318 if (is_siteadmin()) {
319 return true;
322 $areasbylevel = array();
324 // Split areas by context level so we only iterate only once through courses and cms.
325 $searchareas = static::get_search_areas_list(true);
326 foreach ($searchareas as $areaid => $unused) {
327 $classname = static::get_area_classname($areaid);
328 $searcharea = new $classname();
329 foreach ($classname::get_levels() as $level) {
330 $areasbylevel[$level][$areaid] = $searcharea;
334 // This will store area - allowed contexts relations.
335 $areascontexts = array();
337 if (!empty($areasbylevel[CONTEXT_SYSTEM])) {
338 // We add system context to all search areas working at this level. Here each area is fully responsible of
339 // the access control as we can not automate much, we can not even check guest access as some areas might
340 // want to allow guests to retrieve data from them.
342 $systemcontextid = \context_system::instance()->id;
343 foreach ($areasbylevel[CONTEXT_SYSTEM] as $areaid => $searchclass) {
344 $areascontexts[$areaid][] = $systemcontextid;
348 // Get the courses where the current user has access.
349 $courses = enrol_get_my_courses(array('id', 'cacherev'));
350 $courses[SITEID] = get_course(SITEID);
351 $site = \course_modinfo::instance(SITEID);
352 foreach ($courses as $course) {
354 // Info about the course modules.
355 $modinfo = get_fast_modinfo($course);
357 if (!empty($areasbylevel[CONTEXT_COURSE])) {
358 // Add the course contexts the user can view.
360 $coursecontext = \context_course::instance($course->id);
361 foreach ($areasbylevel[CONTEXT_COURSE] as $areaid => $searchclass) {
362 if ($course->visible || has_capability('moodle/course:viewhiddencourses', $coursecontext)) {
363 $areascontexts[$areaid][$coursecontext->id] = $coursecontext->id;
368 if (!empty($areasbylevel[CONTEXT_MODULE])) {
369 // Add the module contexts the user can view (cm_info->uservisible).
371 foreach ($areasbylevel[CONTEXT_MODULE] as $areaid => $searchclass) {
373 // Removing the plugintype 'mod_' prefix.
374 $modulename = substr($searchclass->get_component_name(), 4);
376 $modinstances = $modinfo->get_instances_of($modulename);
377 foreach ($modinstances as $modinstance) {
378 if ($modinstance->uservisible) {
379 $areascontexts[$areaid][$modinstance->context->id] = $modinstance->context->id;
386 return $areascontexts;
390 * Returns documents from the engine based on the data provided.
392 * This function does not perform any kind of security checking, the caller code
393 * should check that the current user have moodle/search:query capability.
395 * It might return the results from the cache instead.
397 * @param stdClass $formdata
398 * @return \core_search\document[]
400 public function search(\stdClass $formdata) {
402 $cache = \cache::make('core', 'search_results');
404 // Generate a string from all query filters
405 // Not including $areascontext here, being a user cache it is not needed.
406 $querykey = $this->generate_query_key($formdata);
408 // Look for cached results before executing it.
409 if ($results = $cache->get($querykey)) {
410 return $results;
413 // Clears previous query errors.
414 $this->engine->clear_query_error();
416 $areascontexts = $this->get_areas_user_accesses();
417 if (!$areascontexts) {
418 // User can not access any context.
419 $docs = array();
420 } else {
421 $docs = $this->engine->execute_query($formdata, $areascontexts);
424 // Cache results.
425 $cache->set($querykey, $docs);
427 return $docs;
431 * We generate the key ourselves so MUC knows that it contains simplekeys.
433 * @param stdClass $formdata
434 * @return string
436 protected function generate_query_key($formdata) {
438 // Empty values by default (although q should always have a value).
439 $fields = array('q', 'title', 'areaid', 'timestart', 'timeend', 'page');
441 // Just in this function scope.
442 $params = clone $formdata;
443 foreach ($fields as $field) {
444 if (empty($params->{$field})) {
445 $params->{$field} = '';
449 // Although it is not likely, we prevent cache hits if available search areas change during the session.
450 $enabledareas = implode('-', array_keys(static::get_search_areas_list(true)));
452 return md5($params->q . 'title=' . $params->title . 'areaid=' . $params->areaid .
453 'timestart=' . $params->timestart . 'timeend=' . $params->timeend . 'page=' . $params->page .
454 $enabledareas);
458 * Merge separate index segments into one.
460 public function optimize_index() {
461 $this->engine->optimize();
465 * Index all documents.
467 * @param bool $fullindex Whether we should reindex everything or not.
468 * @throws \moodle_exception
469 * @return bool Whether there was any updated document or not.
471 public function index($fullindex = false) {
472 global $CFG;
474 // Unlimited time.
475 \core_php_time_limit::raise();
477 // Notify the engine that an index starting.
478 $this->engine->index_starting($fullindex);
480 $sumdocs = 0;
482 $searchareas = $this->get_search_areas_list(true);
483 foreach ($searchareas as $areaid => $searcharea) {
485 if (CLI_SCRIPT && !PHPUNIT_TEST) {
486 mtrace('Processing ' . $searcharea->get_visible_name() . ' area');
489 // Notify the engine that an area is starting.
490 $this->engine->area_index_starting($searcharea, $fullindex);
492 $indexingstart = time();
494 // This is used to store this component config.
495 list($componentconfigname, $varname) = $searcharea->get_config_var_name();
497 $numrecords = 0;
498 $numdocs = 0;
499 $numdocsignored = 0;
500 $lastindexeddoc = 0;
502 if ($fullindex === true) {
503 $prevtimestart = 0;
504 } else {
505 $prevtimestart = intval(get_config($componentconfigname, $varname . '_indexingstart'));
508 // Getting the recordset from the area.
509 $recordset = $searcharea->get_recordset_by_timestamp($prevtimestart);
511 // Pass get_document as callback.
512 $iterator = new \core\dml\recordset_walk($recordset, array($searcharea, 'get_document'));
513 foreach ($iterator as $document) {
515 if (!$document instanceof \core_search\document) {
516 continue;
519 $docdata = $document->export_for_engine();
520 switch ($docdata['type']) {
521 case static::TYPE_TEXT:
522 $this->engine->add_document($docdata);
523 $numdocs++;
524 break;
525 default:
526 $numdocsignored++;
527 $iterator->close();
528 throw new \moodle_exception('doctypenotsupported', 'search');
531 $lastindexeddoc = $document->get('modified');
532 $numrecords++;
535 if (CLI_SCRIPT && !PHPUNIT_TEST) {
536 if ($numdocs > 0) {
537 mtrace('Processed ' . $numrecords . ' records containing ' . $numdocs . ' documents for ' .
538 $searcharea->get_visible_name() . ' area.');
539 } else {
540 mtrace('No new documents to index for ' . $searcharea->get_visible_name() . ' area.');
544 // Notify the engine this area is complete, and only mark times if true.
545 if ($this->engine->area_index_complete($searcharea, $numdocs, $fullindex)) {
546 $sumdocs += $numdocs;
548 // Store last index run once documents have been commited to the search engine.
549 set_config($varname . '_indexingstart', $indexingstart, $componentconfigname);
550 set_config($varname . '_indexingend', time(), $componentconfigname);
551 set_config($varname . '_docsignored', $numdocsignored, $componentconfigname);
552 set_config($varname . '_docsprocessed', $numdocs, $componentconfigname);
553 set_config($varname . '_recordsprocessed', $numrecords, $componentconfigname);
554 if ($lastindexeddoc > 0) {
555 set_config($varname . '_lastindexrun', $lastindexeddoc, $componentconfigname);
560 if ($sumdocs > 0) {
561 $event = \core\event\search_indexed::create(
562 array('context' => \context_system::instance()));
563 $event->trigger();
566 $this->engine->index_complete($sumdocs, $fullindex);
568 return (bool)$sumdocs;
572 * Resets areas config.
574 * @throws \moodle_exception
575 * @param string $areaid
576 * @return void
578 public function reset_config($areaid = false) {
580 if (!empty($areaid)) {
581 $searchareas = array();
582 if (!$searchareas[$areaid] = static::get_search_area($areaid)) {
583 throw new \moodle_exception('errorareanotavailable', 'search', '', $areaid);
585 } else {
586 // Only the enabled ones.
587 $searchareas = static::get_search_areas_list(true);
590 foreach ($searchareas as $searcharea) {
591 list($componentname, $varname) = $searcharea->get_config_var_name();
592 $config = $searcharea->get_config();
594 foreach ($config as $key => $value) {
595 // We reset them all but the enable/disabled one.
596 if ($key !== $varname . '_enabled') {
597 set_config($key, 0, $componentname);
604 * Deletes an area's documents or all areas documents.
606 * @param string $areaid The area id or false for all
607 * @return void
609 public function delete_index($areaid = false) {
610 if (!empty($areaid)) {
611 $this->engine->delete($areaid);
612 $this->reset_config($areaid);
613 } else {
614 $this->engine->delete();
615 $this->reset_config();
620 * Deletes index by id.
622 * @param int Solr Document string $id
624 public function delete_index_by_id($id) {
625 $this->engine->delete_by_id($id);
629 * Returns search areas configuration.
631 * @param \core_search\area\base[] $searchareas
632 * @return \stdClass[] $configsettings
634 public function get_areas_config($searchareas) {
636 $allconfigs = get_config('search');
637 $vars = array('indexingstart', 'indexingend', 'lastindexrun', 'docsignored', 'docsprocessed', 'recordsprocessed');
639 $configsettings = array();
640 foreach ($searchareas as $searcharea) {
642 $areaid = $searcharea->get_area_id();
644 $configsettings[$areaid] = new \stdClass();
645 list($componentname, $varname) = $searcharea->get_config_var_name();
647 if (!$searcharea->is_enabled()) {
648 // We delete all indexed data on disable so no info.
649 foreach ($vars as $var) {
650 $configsettings[$areaid]->{$var} = 0;
652 } else {
653 foreach ($vars as $var) {
654 $configsettings[$areaid]->{$var} = get_config($componentname, $varname .'_' . $var);
658 // Formatting the time.
659 if (!empty($configsettings[$areaid]->lastindexrun)) {
660 $configsettings[$areaid]->lastindexrun = userdate($configsettings[$areaid]->lastindexrun);
661 } else {
662 $configsettings[$areaid]->lastindexrun = get_string('never');
665 return $configsettings;