Merge changes made in revisions #r9405 to #r9467
[phpbb.git] / phpBB / includes / search / fulltext_mysql.php
blob8c4b8f3c7d4b1e4b03a02d5f9edd98a8418c050d
1 <?php
2 /**
4 * @package search
5 * @version $Id$
6 * @copyright (c) 2005 phpBB Group
7 * @license http://opensource.org/licenses/gpl-license.php GNU Public License
9 */
11 /**
12 * @ignore
14 if (!defined('IN_PHPBB'))
16 exit;
19 /**
20 * @ignore
22 include_once(PHPBB_ROOT_PATH . 'includes/search/search.' . PHP_EXT);
24 /**
25 * fulltext_mysql
26 * Fulltext search for MySQL
27 * @package search
29 class fulltext_mysql extends search_backend
31 private $stats = array();
32 public $word_length = array();
33 private $split_words = array();
34 public $search_query;
35 public $common_words = array();
37 function __construct(&$error)
39 $this->word_length = array('min' => phpbb::$config['fulltext_mysql_min_word_len'], 'max' => phpbb::$config['fulltext_mysql_max_word_len']);
41 $error = false;
44 /**
45 * Checks for correct MySQL version and stores min/max word length in the config
47 public function init()
49 $result = phpbb::$db->sql_query('SHOW TABLE STATUS LIKE \'' . POSTS_TABLE . '\'');
50 $info = phpbb::$db->sql_fetchrow($result);
51 phpbb::$db->sql_freeresult($result);
53 $engine = '';
54 if (isset($info['Engine']))
56 $engine = $info['Engine'];
58 else if (isset($info['Type']))
60 $engine = $info['Type'];
63 if ($engine != 'MyISAM')
65 return phpbb::$user->lang['FULLTEXT_MYSQL_NOT_MYISAM'];
68 $sql = 'SHOW VARIABLES
69 LIKE \'ft\_%\'';
70 $result = phpbb::$db->sql_query($sql);
72 $mysql_info = array();
73 while ($row = phpbb::$db->sql_fetchrow($result))
75 $mysql_info[$row['Variable_name']] = $row['Value'];
77 phpbb::$db->sql_freeresult($result);
79 set_config('fulltext_mysql_max_word_len', $mysql_info['ft_max_word_len']);
80 set_config('fulltext_mysql_min_word_len', $mysql_info['ft_min_word_len']);
82 return false;
85 /**
86 * Splits keywords entered by a user into an array of words stored in $this->split_words
87 * Stores the tidied search query in $this->search_query
89 * @param string &$keywords Contains the keyword as entered by the user
90 * @param string $terms is either 'all' or 'any'
91 * @return bool false if no valid keywords were found and otherwise true
93 public function split_keywords(&$keywords, $terms)
95 if ($terms == 'all')
97 $match = array('#\sand\s#iu', '#\sor\s#iu', '#\snot\s#iu', '#\+#', '#-#', '#\|#');
98 $replace = array(' +', ' |', ' -', ' +', ' -', ' |');
100 $keywords = preg_replace($match, $replace, $keywords);
103 // Filter out as above
104 $split_keywords = preg_replace("#[\n\r\t]+#", ' ', trim(htmlspecialchars_decode($keywords)));
106 // Split words
107 $split_keywords = preg_replace('#([^\p{L}\p{N}\'*"()])#u', '$1$1', str_replace('\'\'', '\' \'', trim($split_keywords)));
109 $matches = array();
110 preg_match_all('#(?:[^\p{L}\p{N}*"()]|^)([+\-|]?(?:[\p{L}\p{N}*"()]+\'?)*[\p{L}\p{N}*"()])(?:[^\p{L}\p{N}*"()]|$)#u', $split_keywords, $matches);
111 $this->split_words = $matches[1];
113 // We limit the number of allowed keywords to minimize load on the database
114 if (phpbb::$config['max_num_search_keywords'] && sizeof($this->split_words) > phpbb::$config['max_num_search_keywords'])
116 trigger_error(phpbb::$user->lang('MAX_NUM_SEARCH_KEYWORDS_REFINE', phpbb::$config['max_num_search_keywords'], sizeof($this->split_words)));
119 // to allow phrase search, we need to concatenate quoted words
120 $tmp_split_words = array();
121 $phrase = '';
122 foreach ($this->split_words as $word)
124 if ($phrase)
126 $phrase .= ' ' . $word;
127 if (strpos($word, '"') !== false && substr_count($word, '"') % 2 == 1)
129 $tmp_split_words[] = $phrase;
130 $phrase = '';
133 else if (strpos($word, '"') !== false && substr_count($word, '"') % 2 == 1)
135 $phrase = $word;
137 else
139 $tmp_split_words[] = $word . ' ';
142 if ($phrase)
144 $tmp_split_words[] = $phrase;
147 $this->split_words = $tmp_split_words;
149 unset($tmp_split_words);
150 unset($phrase);
152 foreach ($this->split_words as $i => $word)
154 $clean_word = preg_replace('#^[+\-|"]#', '', $word);
156 // check word length
157 $clean_len = utf8_strlen(str_replace('*', '', $clean_word));
158 if (($clean_len < phpbb::$config['fulltext_mysql_min_word_len']) || ($clean_len > phpbb::$config['fulltext_mysql_max_word_len']))
160 $this->common_words[] = $word;
161 unset($this->split_words[$i]);
165 if ($terms == 'any')
167 $this->search_query = '';
168 foreach ($this->split_words as $word)
170 if ((strpos($word, '+') === 0) || (strpos($word, '-') === 0) || (strpos($word, '|') === 0))
172 $word = substr($word, 1);
174 $this->search_query .= $word . ' ';
177 else
179 $this->search_query = '';
180 foreach ($this->split_words as $word)
182 if ((strpos($word, '+') === 0) || (strpos($word, '-') === 0))
184 $this->search_query .= $word . ' ';
186 else if (strpos($word, '|') === 0)
188 $this->search_query .= substr($word, 1) . ' ';
190 else
192 $this->search_query .= '+' . $word . ' ';
197 $this->search_query = utf8_htmlspecialchars($this->search_query);
199 if ($this->search_query)
201 $this->split_words = array_values($this->split_words);
202 sort($this->split_words);
203 return true;
205 return false;
209 * Turns text into an array of words
211 private function split_message($text)
213 // Split words
214 $text = preg_replace('#([^\p{L}\p{N}\'*])#u', '$1$1', str_replace('\'\'', '\' \'', trim($text)));
216 $matches = array();
217 preg_match_all('#(?:[^\p{L}\p{N}*]|^)([+\-|]?(?:[\p{L}\p{N}*]+\'?)*[\p{L}\p{N}*])(?:[^\p{L}\p{N}*]|$)#u', $text, $matches);
218 $text = $matches[1];
220 // remove too short or too long words
221 $text = array_values($text);
222 for ($i = 0, $n = sizeof($text); $i < $n; $i++)
224 $text[$i] = trim($text[$i]);
225 if (utf8_strlen($text[$i]) < phpbb::$config['fulltext_mysql_min_word_len'] || utf8_strlen($text[$i]) > phpbb::$config['fulltext_mysql_max_word_len'])
227 unset($text[$i]);
231 return array_values($text);
235 * Performs a search on keywords depending on display specific params. You have to run split_keywords() first.
237 * @param string $type contains either posts or topics depending on what should be searched for
238 * @param string &$fields contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched)
239 * @param string &$terms is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words)
240 * @param array &$sort_by_sql contains SQL code for the ORDER BY part of a query
241 * @param string &$sort_key is the key of $sort_by_sql for the selected sorting
242 * @param string &$sort_dir is either a or d representing ASC and DESC
243 * @param string &$sort_days specifies the maximum amount of days a post may be old
244 * @param array &$ex_fid_ary specifies an array of forum ids which should not be searched
245 * @param array &$m_approve_fid_ary specifies an array of forum ids in which the searcher is allowed to view unapproved posts
246 * @param int &$topic_id is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
247 * @param array &$author_ary an array of author ids if the author should be ignored during the search the array is empty
248 * @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
249 * @param int $start indicates the first index of the page
250 * @param int $per_page number of ids each page is supposed to contain
251 * @return boolean|int total number of results
253 * @access public
255 public function keyword_search($type, &$fields, &$terms, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page)
257 // No keywords? No posts.
258 if (!$this->search_query)
260 return false;
263 // generate a search_key from all the options to identify the results
264 $search_key = md5(implode('#', array(
265 implode(', ', $this->split_words),
266 $type,
267 $fields,
268 $terms,
269 $sort_days,
270 $sort_key,
271 $topic_id,
272 implode(',', $ex_fid_ary),
273 implode(',', $m_approve_fid_ary),
274 implode(',', $author_ary)
275 )));
277 // try reading the results from cache
278 $result_count = 0;
279 if ($this->obtain_ids($search_key, $result_count, $id_ary, $start, $per_page, $sort_dir) == self::SEARCH_RESULT_IN_CACHE)
281 return $result_count;
284 $id_ary = array();
286 $join_topic = ($type == 'posts') ? false : true;
288 // Build sql strings for sorting
289 $sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
290 $sql_sort_table = $sql_sort_join = '';
292 switch ($sql_sort[0])
294 case 'u':
295 $sql_sort_table = USERS_TABLE . ' u, ';
296 $sql_sort_join = ($type == 'posts') ? ' AND u.user_id = p.poster_id ' : ' AND u.user_id = t.topic_poster ';
297 break;
299 case 't':
300 $join_topic = true;
301 break;
303 case 'f':
304 $sql_sort_table = FORUMS_TABLE . ' f, ';
305 $sql_sort_join = ' AND f.forum_id = p.forum_id ';
306 break;
309 // Build some display specific sql strings
310 switch ($fields)
312 case 'titleonly':
313 $sql_match = 'p.post_subject';
314 $sql_match_where = ' AND p.post_id = t.topic_first_post_id';
315 $join_topic = true;
316 break;
318 case 'msgonly':
319 $sql_match = 'p.post_text';
320 $sql_match_where = '';
321 break;
323 case 'firstpost':
324 $sql_match = 'p.post_subject, p.post_text';
325 $sql_match_where = ' AND p.post_id = t.topic_first_post_id';
326 $join_topic = true;
327 break;
329 default:
330 $sql_match = 'p.post_subject, p.post_text';
331 $sql_match_where = '';
332 break;
335 if (!sizeof($m_approve_fid_ary))
337 $m_approve_fid_sql = ' AND p.post_approved = 1';
339 else if ($m_approve_fid_ary === array(-1))
341 $m_approve_fid_sql = '';
343 else
345 $m_approve_fid_sql = ' AND (p.post_approved = 1 OR ' . phpbb::$db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')';
348 $sql_select = (!$result_count) ? 'SQL_CALC_FOUND_ROWS ' : '';
349 $sql_select = ($type == 'posts') ? $sql_select . 'p.post_id' : 'DISTINCT ' . $sql_select . 't.topic_id';
350 $sql_from = ($join_topic) ? TOPICS_TABLE . ' t, ' : '';
351 $field = ($type == 'posts') ? 'post_id' : 'topic_id';
352 $sql_author = (sizeof($author_ary) == 1) ? ' = ' . $author_ary[0] : 'IN (' . implode(', ', $author_ary) . ')';
354 $sql_where_options = $sql_sort_join;
355 $sql_where_options .= ($topic_id) ? ' AND p.topic_id = ' . $topic_id : '';
356 $sql_where_options .= ($join_topic) ? ' AND t.topic_id = p.topic_id' : '';
357 $sql_where_options .= (sizeof($ex_fid_ary)) ? ' AND ' . phpbb::$db->sql_in_set('p.forum_id', $ex_fid_ary, true) : '';
358 $sql_where_options .= $m_approve_fid_sql;
359 $sql_where_options .= (sizeof($author_ary)) ? ' AND p.poster_id ' . $sql_author : '';
360 $sql_where_options .= ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
361 $sql_where_options .= $sql_match_where;
363 $sql = "SELECT $sql_select
364 FROM $sql_from$sql_sort_table" . POSTS_TABLE . " p
365 WHERE MATCH ($sql_match) AGAINST ('" . phpbb::$db->sql_escape(htmlspecialchars_decode($this->search_query)) . "' IN BOOLEAN MODE)
366 $sql_where_options
367 ORDER BY $sql_sort";
368 $result = phpbb::$db->sql_query_limit($sql, phpbb::$config['search_block_size'], $start);
370 while ($row = phpbb::$db->sql_fetchrow($result))
372 $id_ary[] = $row[$field];
374 phpbb::$db->sql_freeresult($result);
376 $id_ary = array_unique($id_ary);
378 if (!sizeof($id_ary))
380 return false;
383 // if the total result count is not cached yet, retrieve it from the db
384 if (!$result_count)
386 $sql = 'SELECT FOUND_ROWS() as result_count';
387 $result = phpbb::$db->sql_query($sql);
388 $result_count = (int) phpbb::$db->sql_fetchfield('result_count');
389 phpbb::$db->sql_freeresult($result);
391 if (!$result_count)
393 return false;
397 // store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page
398 $this->save_ids($search_key, implode(' ', $this->split_words), $author_ary, $result_count, $id_ary, $start, $sort_dir);
399 $id_ary = array_slice($id_ary, 0, (int) $per_page);
401 return $result_count;
405 * Performs a search on an author's posts without caring about message contents. Depends on display specific params
407 * @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
408 * @param int $start indicates the first index of the page
409 * @param int $per_page number of ids each page is supposed to contain
410 * @return total number of results
412 public function author_search($type, $firstpost_only, &$sort_by_sql, &$sort_key, &$sort_dir, &$sort_days, &$ex_fid_ary, &$m_approve_fid_ary, &$topic_id, &$author_ary, &$id_ary, $start, $per_page)
414 // No author? No posts.
415 if (!sizeof($author_ary))
417 return 0;
420 // generate a search_key from all the options to identify the results
421 $search_key = md5(implode('#', array(
423 $type,
424 ($firstpost_only) ? 'firstpost' : '',
427 $sort_days,
428 $sort_key,
429 $topic_id,
430 implode(',', $ex_fid_ary),
431 implode(',', $m_approve_fid_ary),
432 implode(',', $author_ary)
433 )));
435 // try reading the results from cache
436 $result_count = 0;
437 if ($this->obtain_ids($search_key, $result_count, $id_ary, $start, $per_page, $sort_dir) == self::SEARCH_RESULT_IN_CACHE)
439 return $result_count;
442 $id_ary = array();
444 // Create some display specific sql strings
445 $sql_author = phpbb::$db->sql_in_set('p.poster_id', $author_ary);
446 $sql_fora = (sizeof($ex_fid_ary)) ? ' AND ' . phpbb::$db->sql_in_set('p.forum_id', $ex_fid_ary, true) : '';
447 $sql_topic_id = ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : '';
448 $sql_time = ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
449 $sql_firstpost = ($firstpost_only) ? ' AND p.post_id = t.topic_first_post_id' : '';
451 // Build sql strings for sorting
452 $sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
453 $sql_sort_table = $sql_sort_join = '';
454 switch ($sql_sort[0])
456 case 'u':
457 $sql_sort_table = USERS_TABLE . ' u, ';
458 $sql_sort_join = ($type == 'posts') ? ' AND u.user_id = p.poster_id ' : ' AND u.user_id = t.topic_poster ';
459 break;
461 case 't':
462 $sql_sort_table = ($type == 'posts' && !$firstpost_only) ? TOPICS_TABLE . ' t, ' : '';
463 $sql_sort_join = ($type == 'posts' && !$firstpost_only) ? ' AND t.topic_id = p.topic_id ' : '';
464 break;
466 case 'f':
467 $sql_sort_table = FORUMS_TABLE . ' f, ';
468 $sql_sort_join = ' AND f.forum_id = p.forum_id ';
469 break;
472 if (!sizeof($m_approve_fid_ary))
474 $m_approve_fid_sql = ' AND p.post_approved = 1';
476 else if ($m_approve_fid_ary == array(-1))
478 $m_approve_fid_sql = '';
480 else
482 $m_approve_fid_sql = ' AND (p.post_approved = 1 OR ' . phpbb::$db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')';
485 // If the cache was completely empty count the results
486 $calc_results = ($result_count) ? '' : 'SQL_CALC_FOUND_ROWS ';
488 // Build the query for really selecting the post_ids
489 if ($type == 'posts')
491 $sql = "SELECT {$calc_results}p.post_id
492 FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t ' : ' ') . "
493 WHERE $sql_author
494 $sql_topic_id
495 $sql_firstpost
496 $m_approve_fid_sql
497 $sql_fora
498 $sql_sort_join
499 $sql_time
500 ORDER BY $sql_sort";
501 $field = 'post_id';
503 else
505 $sql = "SELECT {$calc_results}t.topic_id
506 FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
507 WHERE $sql_author
508 $sql_topic_id
509 $sql_firstpost
510 $m_approve_fid_sql
511 $sql_fora
512 AND t.topic_id = p.topic_id
513 $sql_sort_join
514 $sql_time
515 GROUP BY t.topic_id
516 ORDER BY $sql_sort";
517 $field = 'topic_id';
520 // Only read one block of posts from the db and then cache it
521 $result = phpbb::$db->sql_query_limit($sql, phpbb::$config['search_block_size'], $start);
523 while ($row = phpbb::$db->sql_fetchrow($result))
525 $id_ary[] = $row[$field];
527 phpbb::$db->sql_freeresult($result);
529 // retrieve the total result count if needed
530 if (!$result_count)
532 $sql = 'SELECT FOUND_ROWS() as result_count';
533 $result = phpbb::$db->sql_query($sql);
534 $result_count = (int) phpbb::$db->sql_fetchfield('result_count');
535 phpbb::$db->sql_freeresult($result);
537 if (!$result_count)
539 return false;
543 if (sizeof($id_ary))
545 $this->save_ids($search_key, '', $author_ary, $result_count, $id_ary, $start, $sort_dir);
546 $id_ary = array_slice($id_ary, 0, $per_page);
548 return $result_count;
550 return false;
554 * Destroys cached search results, that contained one of the new words in a post so the results won't be outdated.
556 * @param string $mode contains the post mode: edit, post, reply, quote ...
558 public function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id)
560 // Split old and new post/subject to obtain array of words
561 $split_text = $this->split_message($message);
562 $split_title = ($subject) ? $this->split_message($subject) : array();
564 $words = array_unique(array_merge($split_text, $split_title));
566 unset($split_text);
567 unset($split_title);
569 // destroy cached search results containing any of the words removed or added
570 $this->destroy_cache($words, array($poster_id));
572 unset($words);
576 * Destroy cached results, that might be outdated after deleting a post
578 public function index_remove($post_ids, $author_ids, $forum_ids)
580 $this->destroy_cache(array(), $author_ids);
584 * Destroy old cache entries
586 public function tidy()
588 // destroy too old cached search results
589 $this->destroy_cache(array());
591 set_config('search_last_gc', time(), true);
595 * Create fulltext index
597 public function create_index($acp_module, $u_action)
599 // Make sure we can actually use MySQL with fulltext indexes
600 if ($error = $this->init())
602 return $error;
605 if (empty($this->stats))
607 $this->get_stats();
610 $alter = array();
612 if (!isset($this->stats['post_subject']))
614 //$alter[] = 'MODIFY post_subject varchar(100) COLLATE utf8_unicode_ci DEFAULT \'\' NOT NULL';
615 $alter[] = 'ADD FULLTEXT (post_subject)';
618 if (!isset($this->stats['post_text']))
620 $alter[] = 'MODIFY post_text mediumtext COLLATE utf8_unicode_ci NOT NULL';
621 $alter[] = 'ADD FULLTEXT (post_text)';
624 if (!isset($this->stats['post_content']))
626 $alter[] = 'ADD FULLTEXT post_content (post_subject, post_text)';
629 if (sizeof($alter))
631 phpbb::$db->sql_query('ALTER TABLE ' . POSTS_TABLE . ' ' . implode(', ', $alter));
634 phpbb::$db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE);
636 return false;
640 * Drop fulltext index
642 public function delete_index($acp_module, $u_action)
644 // Make sure we can actually use MySQL with fulltext indexes
645 if ($error = $this->init())
647 return $error;
650 if (empty($this->stats))
652 $this->get_stats();
655 $alter = array();
657 if (isset($this->stats['post_subject']))
659 $alter[] = 'DROP INDEX post_subject';
662 if (isset($this->stats['post_text']))
664 $alter[] = 'DROP INDEX post_text';
667 if (isset($this->stats['post_content']))
669 $alter[] = 'DROP INDEX post_content';
672 if (sizeof($alter))
674 phpbb::$db->sql_query('ALTER TABLE ' . POSTS_TABLE . ' ' . implode(', ', $alter));
677 phpbb::$db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE);
679 return false;
683 * Returns true if both FULLTEXT indexes exist
685 public function index_created()
687 if (empty($this->stats))
689 $this->get_stats();
692 return (isset($this->stats['post_text']) && isset($this->stats['post_subject']) && isset($this->stats['post_content'])) ? true : false;
696 * Returns an associative array containing information about the indexes
698 public function index_stats()
700 if (empty($this->stats))
702 $this->get_stats();
705 return array(
706 phpbb::$user->lang['FULLTEXT_MYSQL_TOTAL_POSTS'] => ($this->index_created()) ? $this->stats['total_posts'] : 0,
710 private function get_stats()
712 if (phpbb::$db->dbms_type !== 'mysql')
714 $this->stats = array();
715 return;
718 $sql = 'SHOW INDEX
719 FROM ' . POSTS_TABLE;
720 $result = phpbb::$db->sql_query($sql);
722 while ($row = phpbb::$db->sql_fetchrow($result))
724 // deal with older MySQL versions which didn't use Index_type
725 $index_type = (isset($row['Index_type'])) ? $row['Index_type'] : $row['Comment'];
727 if ($index_type == 'FULLTEXT')
729 if ($row['Key_name'] == 'post_text')
731 $this->stats['post_text'] = $row;
733 else if ($row['Key_name'] == 'post_subject')
735 $this->stats['post_subject'] = $row;
737 else if ($row['Key_name'] == 'post_content')
739 $this->stats['post_content'] = $row;
743 phpbb::$db->sql_freeresult($result);
745 $sql = 'SELECT COUNT(post_id) as total_posts
746 FROM ' . POSTS_TABLE;
747 $result = phpbb::$db->sql_query($sql);
748 $this->stats['total_posts'] = (int) phpbb::$db->sql_fetchfield('total_posts');
749 phpbb::$db->sql_freeresult($result);
753 * Display nothing, we force UTF-8 support in all versions of PHP
755 function acp()
757 $tpl = '';
759 // These are fields required in the config table
760 return array(
761 'tpl' => $tpl,
762 'config' => array()