Moodle release 1.9.15
[moodle.git] / search / indexer.php
blobb63496ad56b1b63a4d23765443b096fd10314706
1 <?php
2 /**
3 * Global Search Engine for Moodle
5 * @package search
6 * @category core
7 * @subpackage search_engine
8 * @author Michael Champanis (mchampan) [cynnical@gmail.com], Valery Fremaux [valery.fremaux@club-internet.fr] > 1.8
9 * @date 2008/03/31
10 * @license http://www.gnu.org/copyleft/gpl.html GNU Public License
12 * The indexer logic -
14 * Look through each installed module's or block's search document class file (/search/documents)
15 * for necessary search functions, and if they're present add the content to the index.
16 * Repeat this for blocks.
18 * Because the iterator/retrieval functions are now stored in /search/documents/<mod>_document.php,
19 * /mod/mod/lib.php doesn't have to be modified - and thus the search module becomes quite
20 * self-sufficient. URL's are now stored in the index, stopping us from needing to require
21 * the class files to generate a results page.
23 * Along with the index data, each document's summary gets stored in the database
24 * and synchronised to the index (flat file) via the primary key ('id') which is mapped
25 * to the 'dbid' field in the index
26 * */
28 //this'll take some time, set up the environment
29 @set_time_limit(0);
30 @ob_implicit_flush(true);
31 @ob_end_flush();
33 /**
34 * includes and requires
36 require_once('../config.php');
37 require_once($CFG->dirroot.'/search/lib.php');
39 //require_once("debugging.php");
41 ini_set('include_path', $CFG->dirroot.DIRECTORY_SEPARATOR.'search'.PATH_SEPARATOR.ini_get('include_path'));
43 /// only administrators can index the moodle installation, because access to all pages is required
45 require_login();
47 if (empty($CFG->enableglobalsearch)) {
48 error(get_string('globalsearchdisabled', 'search'));
51 if (!has_capability('moodle/site:doanything', get_context_instance(CONTEXT_SYSTEM))) {
52 error(get_string('beadmin', 'search'), "$CFG->wwwroot/login/index.php");
55 /// confirmation flag to prevent accidental reindexing (indexersplash.php is the correct entry point)
57 $sure = strtolower(optional_param('areyousure', '', PARAM_ALPHA));
59 if ($sure != 'yes') {
60 mtrace("<pre>Sorry, you need to confirm indexing via <a href='indexersplash.php'>indexersplash.php</a>"
61 .". (<a href='index.php'>Back to query page</a>).</pre>");
63 exit(0);
66 /// check for php5 (lib.php)
68 //php5 found, continue including php5-only files
69 //require_once("$CFG->dirroot/search/Zend/Search/Lucene.php");
70 require_once("$CFG->dirroot/search/indexlib.php");
72 mtrace('<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8" /></head><body>');
73 mtrace('<pre>Server Time: '.date('r',time())."\n");
75 if (isset($CFG->search_indexer_busy) && $CFG->search_indexer_busy == '1') {
76 //means indexing was not finished previously
77 mtrace("Warning: Indexing was not successfully completed last time, restarting.\n");
80 /// turn on busy flag
82 set_config('search_indexer_busy', '1');
84 //paths
85 $index_path = SEARCH_INDEX_PATH;
86 $index_db_file = "{$CFG->dirroot}/search/db/$CFG->dbtype.sql";
87 $dbcontrol = new IndexDBControl();
89 /// setup directory in data root
91 if (!file_exists($index_path)) {
92 mtrace("Data directory ($index_path) does not exist, attempting to create.");
93 if (!mkdir($index_path, $CFG->directorypermissions)) {
94 search_pexit("Error creating data directory at: $index_path. Please correct.");
96 else {
97 mtrace("Directory successfully created.");
100 else {
101 mtrace("Using {$index_path} as data directory.");
104 Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive());
105 $index = new Zend_Search_Lucene($index_path, true);
108 OBSOLETE REGENERATION - DB installs with search block by now
109 if (!$dbcontrol->checkDB()) {
110 search_pexit("Database error. Please check settings/files.");
114 /// New regeneration
116 mtrace('Deleting old index entries.');
117 delete_records(SEARCH_DATABASE_TABLE);
119 /// begin timer
121 search_stopwatch();
122 mtrace("Starting activity modules\n");
124 //the presence of the required search functions -
125 // * mod_iterator
126 // * mod_get_content_for_index
127 //are the sole basis for including a module in the index at the moment.
129 $searchables = search_collect_searchables();
131 /// start indexation
133 if ($searchables){
134 foreach ($searchables as $mod) {
136 echo "start {$mod->name}";
138 $key = 'search_in_'.$mod->name;
139 if (isset($CFG->$key) && !$CFG->$key) {
140 mtrace("module $key has been administratively disabled. Skipping...\n");
141 continue;
144 if ($mod->location == 'internal'){
145 $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
146 } else {
147 $class_file = $CFG->dirroot.'/'.$mod->location.'/'.$mod->name.'/search_document.php';
151 if (!file_exists($class_file)){
152 if (defined("PATH_FOR_SEARCH_TYPE_{$mod->name}")){
153 eval("\$pluginpath = PATH_FOR_SEARCH_TYPE_{$mod->name}");
154 $class_file = "{$CFG->dirroot}/{$pluginpath}/searchlib.php";
155 } else {
156 mtrace ("No search document found for plugin {$mod->name}. Ignoring.");
157 continue;
162 if (file_exists($class_file)) {
163 include_once($class_file);
165 //build function names
166 $iter_function = $mod->name.'_iterator';
167 $index_function = $mod->name.'_get_content_for_index';
168 $counter = 0;
169 if (function_exists($index_function) && function_exists($iter_function)) {
170 mtrace("Processing module function $index_function ...");
171 $sources = $iter_function();
172 if ($sources){
173 foreach ($sources as $i) {
174 $documents = $index_function($i);
176 //begin transaction
177 if ($documents){
178 foreach($documents as $document) {
179 $counter++;
181 //object to insert into db
182 $dbid = $dbcontrol->addDocument($document);
184 //synchronise db with index
185 $document->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid));
187 //add document to index
188 $index->addDocument($document);
190 //commit every x new documents, and print a status message
191 if (($counter % 2000) == 0) {
192 $index->commit();
193 mtrace(".. $counter");
197 //end transaction
201 //commit left over documents, and finish up
202 $index->commit();
204 mtrace("-- $counter documents indexed");
205 mtrace("done.\n");
207 } else {
208 mtrace ("No search document found for plugin {$mod->name}. Ignoring.");
213 /// finished modules
215 mtrace('Finished activity modules');
216 search_stopwatch();
218 mtrace(".<br/><a href='index.php'>Back to query page</a>.");
219 mtrace('</pre>');
221 /// finished, turn busy flag off
223 set_config('search_indexer_busy', '0');
225 /// mark the time we last updated
227 set_config('search_indexer_run_date', time());
229 /// and the index size
231 set_config('search_index_size', (int)$index->count());