MDL-27718 Files embedded into the course summary and section summaries are migrated...
[moodle.git] / search / documents / physical_htm.php
blobe96e066f6d62a8e0a1405942a10ea203e9ca7b9a
1 <?php
2 /**
3 * Global Search Engine for Moodle
5 * @package search
6 * @category core
7 * @subpackage document_wrappers
8 * @author Valery Fremaux [valery.fremaux@club-internet.fr] > 1.8
9 * @date 2008/03/31
10 * @license http://www.gnu.org/copyleft/gpl.html GNU Public License
11 * @version revised for Moodle 2.0
13 * this is a format handler for getting text out of a proprietary binary format
14 * so it can be indexed by Lucene search engine
17 /**
18 * @param object $resource
19 * @param string $directfile if the resource is given as a direct file path, use it as reference to the file
20 * @uses $CFG
22 function get_text_for_indexing_htm(&$resource, $directfile = ''){
23 global $CFG;
25 // SECURITY : do not allow non admin execute anything on system !!
26 if (!has_capability('moodle/site:config', get_context_instance(CONTEXT_SYSTEM))) return;
28 // just get text
29 if ($directfile == ''){
30 $text = implode('', file("{$CFG->dataroot}/{$resource->course}/{$resource->reference}"));
31 } else {
32 $text = implode('', file("{$CFG->dataroot}/{$directfile}"));
35 // extract keywords and other interesting meta information and put it back as real content for indexing
36 if (preg_match('/(.*)<meta ([^>]*)>(.*)/is', $text, $matches)){
37 $prefix = $matches[1];
38 $meta_attributes = $matches[2];
39 $suffix = $matches[3];
40 if (preg_match('/name="(keywords|description)"/i', $meta_attributes)){
41 preg_match('/content="([^"]+)"/i', $meta_attributes, $matches);
42 $text = $prefix.' '.$matches[1].' '.$suffix;
45 // brutally filters all html tags
46 $text = preg_replace("/<[^>]*>/", '', $text);
47 $text = preg_replace("/<!--[^>]*-->/", '', $text);
48 $text = html_entity_decode($text, ENT_COMPAT, 'UTF-8');
49 $text = mb_convert_encoding($text, 'UTF-8', 'auto');
52 * debug code for tracing input
53 echo "<hr/>";
54 $FILE = fopen("filetrace.log", 'w');
55 fwrite($FILE, $text);
56 fclose($FILE);
57 echo "<hr/>";
60 if (!empty($CFG->block_search_limit_index_body)){
61 $text = shorten_text($text, $CFG->block_search_limit_index_body);
63 return $text;