Bumping version for 1.8.9 release. Thinking we might have to go to x.x.10 for the...
[moodle.git] / lib / filterlib.php
blob2942ce6e9e06000a51b18b1ef308c22d761d02b8
1 <?php // $Id$
2 // Contains special functions that are particularly useful to filters
5 /// Define one exclusive separator that we'll use in the temp saved tags
6 /// keys. It must be something rare enough to avoid having matches with
7 /// filterobjects. MDL-18165
8 define ('EXCL_SEPARATOR', '-%-');
10 /**
11 * This is just a little object to define a phrase and some instructions
12 * for how to process it. Filters can create an array of these to pass
13 * to the filter_phrases function below.
14 **/
15 class filterobject {
16 var $phrase;
17 var $hreftagbegin;
18 var $hreftagend;
19 var $casesensitive;
20 var $fullmatch;
21 var $replacementphrase;
22 var $work_phrase;
23 var $work_hreftagbegin;
24 var $work_hreftagend;
25 var $work_casesensitive;
26 var $work_fullmatch;
27 var $work_replacementphrase;
28 var $work_calculated;
30 /// a constructor just because I like constructing
31 function filterobject($phrase, $hreftagbegin='<span class="highlight">',
32 $hreftagend='</span>',
33 $casesensitive=false,
34 $fullmatch=false,
35 $replacementphrase=NULL) {
37 $this->phrase = $phrase;
38 $this->hreftagbegin = $hreftagbegin;
39 $this->hreftagend = $hreftagend;
40 $this->casesensitive = $casesensitive;
41 $this->fullmatch = $fullmatch;
42 $this->replacementphrase= $replacementphrase;
43 $this->work_calculated = false;
48 /**
49 * Process phrases intelligently found within a HTML text (such as adding links)
51 * param text the text that we are filtering
52 * param link_array an array of filterobjects
53 * param ignoretagsopen an array of opening tags that we should ignore while filtering
54 * param ignoretagsclose an array of corresponding closing tags
55 **/
56 function filter_phrases ($text, &$link_array, $ignoretagsopen=NULL, $ignoretagsclose=NULL) {
58 global $CFG;
60 static $usedphrases;
62 $ignoretags = array(); //To store all the enclosig tags to be completely ignored
63 $tags = array(); //To store all the simple tags to be ignored
65 /// A list of open/close tags that we should not replace within
66 /// No reason why you can't put full preg expressions in here too
67 /// eg '<script(.+?)>' to match any type of script tag
68 $filterignoretagsopen = array('<head>' , '<nolink>' , '<span class="nolink">');
69 $filterignoretagsclose = array('</head>', '</nolink>', '</span>');
71 /// Invalid prefixes and suffixes for the fullmatch searches
72 /// Every "word" character, but the underscore, is a invalid suffix or prefix.
73 /// (nice to use this because it includes national characters (accents...) as word characters.
74 $filterinvalidprefixes = '([^\W_])';
75 $filterinvalidsuffixes = '([^\W_])';
77 /// Add the user defined ignore tags to the default list
78 /// Unless specified otherwise, we will not replace within <a></a> tags
79 if ( $ignoretagsopen === NULL ) {
80 //$ignoretagsopen = array('<a(.+?)>');
81 $ignoretagsopen = array('<a[^>]+?>');
82 $ignoretagsclose = array('</a>');
85 if ( is_array($ignoretagsopen) ) {
86 foreach ($ignoretagsopen as $open) $filterignoretagsopen[] = $open;
87 foreach ($ignoretagsclose as $close) $filterignoretagsclose[] = $close;
90 //// Double up some magic chars to avoid "accidental matches"
91 $text = preg_replace('/([#*%])/','\1\1',$text);
94 ////Remove everything enclosed by the ignore tags from $text
95 filter_save_ignore_tags($text,$filterignoretagsopen,$filterignoretagsclose,$ignoretags);
97 /// Remove tags from $text
98 filter_save_tags($text,$tags);
100 /// Time to cycle through each phrase to be linked
101 $size = sizeof($link_array);
102 for ($n=0; $n < $size; $n++) {
103 $linkobject =& $link_array[$n];
105 /// Set some defaults if certain properties are missing
106 /// Properties may be missing if the filterobject class has not been used to construct the object
107 if (empty($linkobject->phrase)) {
108 continue;
111 /// Avoid integers < 1000 to be linked. See bug 1446.
112 $intcurrent = intval($linkobject->phrase);
113 if (!empty($intcurrent) && strval($intcurrent) == $linkobject->phrase && $intcurrent < 1000) {
114 continue;
117 /// All this work has to be done ONLY it it hasn't been done before
118 if (!$linkobject->work_calculated) {
119 if (!isset($linkobject->hreftagbegin) or !isset($linkobject->hreftagend)) {
120 $linkobject->work_hreftagbegin = '<span class="highlight"';
121 $linkobject->work_hreftagend = '</span>';
122 } else {
123 $linkobject->work_hreftagbegin = $linkobject->hreftagbegin;
124 $linkobject->work_hreftagend = $linkobject->hreftagend;
127 /// Double up chars to protect true duplicates
128 /// be cleared up before returning to the user.
129 $linkobject->work_hreftagbegin = preg_replace('/([#*%])/','\1\1',$linkobject->work_hreftagbegin);
131 if (empty($linkobject->casesensitive)) {
132 $linkobject->work_casesensitive = false;
133 } else {
134 $linkobject->work_casesensitive = true;
136 if (empty($linkobject->fullmatch)) {
137 $linkobject->work_fullmatch = false;
138 } else {
139 $linkobject->work_fullmatch = true;
142 /// Strip tags out of the phrase
143 $linkobject->work_phrase = strip_tags($linkobject->phrase);
145 /// Double up chars that might cause a false match -- the duplicates will
146 /// be cleared up before returning to the user.
147 $linkobject->work_phrase = preg_replace('/([#*%])/','\1\1',$linkobject->work_phrase);
149 /// Set the replacement phrase properly
150 if ($linkobject->replacementphrase) { //We have specified a replacement phrase
151 /// Strip tags
152 $linkobject->work_replacementphrase = strip_tags($linkobject->replacementphrase);
153 } else { //The replacement is the original phrase as matched below
154 $linkobject->work_replacementphrase = '$1';
157 /// Quote any regular expression characters and the delimiter in the work phrase to be searched
158 $linkobject->work_phrase = preg_quote($linkobject->work_phrase, '/');
160 /// Work calculated
161 $linkobject->work_calculated = true;
165 /// If $CFG->filtermatchoneperpage, avoid previously (request) linked phrases
166 if (!empty($CFG->filtermatchoneperpage)) {
167 if (!empty($usedphrases) && in_array($linkobject->work_phrase,$usedphrases)) {
168 continue;
172 /// Regular expression modifiers
173 $modifiers = ($linkobject->work_casesensitive) ? 's' : 'isu'; // works in unicode mode!
175 /// Do we need to do a fullmatch?
176 /// If yes then go through and remove any non full matching entries
177 if ($linkobject->work_fullmatch) {
178 $notfullmatches = array();
179 $regexp = '/'.$filterinvalidprefixes.'('.$linkobject->work_phrase.')|('.$linkobject->work_phrase.')'.$filterinvalidsuffixes.'/'.$modifiers;
181 preg_match_all($regexp,$text,$list_of_notfullmatches);
183 if ($list_of_notfullmatches) {
184 foreach (array_unique($list_of_notfullmatches[0]) as $key=>$value) {
185 $notfullmatches['<*'.$key.'*>'] = $value;
187 if (!empty($notfullmatches)) {
188 $text = str_replace($notfullmatches,array_keys($notfullmatches),$text);
193 /// Finally we do our highlighting
194 if (!empty($CFG->filtermatchonepertext) || !empty($CFG->filtermatchoneperpage)) {
195 $resulttext = preg_replace('/('.$linkobject->work_phrase.')/'.$modifiers,
196 $linkobject->work_hreftagbegin.
197 $linkobject->work_replacementphrase.
198 $linkobject->work_hreftagend, $text, 1);
199 } else {
200 $resulttext = preg_replace('/('.$linkobject->work_phrase.')/'.$modifiers,
201 $linkobject->work_hreftagbegin.
202 $linkobject->work_replacementphrase.
203 $linkobject->work_hreftagend, $text);
207 /// If the text has changed we have to look for links again
208 if ($resulttext != $text) {
209 /// Set $text to $resulttext
210 $text = $resulttext;
211 /// Remove everything enclosed by the ignore tags from $text
212 filter_save_ignore_tags($text,$filterignoretagsopen,$filterignoretagsclose,$ignoretags);
213 /// Remove tags from $text
214 filter_save_tags($text,$tags);
215 /// If $CFG->filtermatchoneperpage, save linked phrases to request
216 if (!empty($CFG->filtermatchoneperpage)) {
217 $usedphrases[] = $linkobject->work_phrase;
222 /// Replace the not full matches before cycling to next link object
223 if (!empty($notfullmatches)) {
224 $text = str_replace(array_keys($notfullmatches),$notfullmatches,$text);
225 unset($notfullmatches);
229 /// Rebuild the text with all the excluded areas
231 if (!empty($tags)) {
232 $text = str_replace(array_keys($tags), $tags, $text);
235 if (!empty($ignoretags)) {
236 $ignoretags = array_reverse($ignoretags); /// Reversed so "progressive" str_replace() will solve some nesting problems.
237 $text = str_replace(array_keys($ignoretags),$ignoretags,$text);
240 //// Remove the protective doubleups
241 $text = preg_replace('/([#*%])(\1)/','\1',$text);
243 /// Add missing javascript for popus
244 $text = filter_add_javascript($text);
247 return $text;
253 function filter_remove_duplicates($linkarray) {
255 $concepts = array(); // keep a record of concepts as we cycle through
256 $lconcepts = array(); // a lower case version for case insensitive
258 $cleanlinks = array();
260 foreach ($linkarray as $key=>$filterobject) {
261 if ($filterobject->casesensitive) {
262 $exists = in_array($filterobject->phrase, $concepts);
263 } else {
264 $exists = in_array(moodle_strtolower($filterobject->phrase), $lconcepts);
267 if (!$exists) {
268 $cleanlinks[] = $filterobject;
269 $concepts[] = $filterobject->phrase;
270 $lconcepts[] = moodle_strtolower($filterobject->phrase);
274 return $cleanlinks;
278 * Extract open/lose tags and their contents to avoid being processed by filters.
279 * Useful to extract pieces of code like <a>...</a> tags. It returns the text
280 * converted with some <#xEXCL_SEPARATORx#> codes replacing the extracted text. Such extracted
281 * texts are returned in the ignoretags array (as values), with codes as keys.
283 * param text the text that we are filtering (in/out)
284 * param filterignoretagsopen an array of open tags to start searching
285 * param filterignoretagsclose an array of close tags to end searching
286 * param ignoretags an array of saved strings useful to rebuild the original text (in/out)
288 function filter_save_ignore_tags(&$text,$filterignoretagsopen,$filterignoretagsclose,&$ignoretags) {
290 /// Remove everything enclosed by the ignore tags from $text
291 foreach ($filterignoretagsopen as $ikey=>$opentag) {
292 $closetag = $filterignoretagsclose[$ikey];
293 /// form regular expression
294 $opentag = str_replace('/','\/',$opentag); // delimit forward slashes
295 $closetag = str_replace('/','\/',$closetag); // delimit forward slashes
296 $pregexp = '/'.$opentag.'(.+?)'.$closetag.'/is';
298 preg_match_all($pregexp, $text, $list_of_ignores);
299 foreach (array_unique($list_of_ignores[0]) as $key=>$value) {
300 $prefix = (string)(count($ignoretags) + 1);
301 $ignoretags['<#'.$prefix.EXCL_SEPARATOR.$key.'#>'] = $value;
303 if (!empty($ignoretags)) {
304 $text = str_replace($ignoretags,array_keys($ignoretags),$text);
310 * Extract tags (any text enclosed by < and > to avoid being processed by filters.
311 * It returns the text converted with some <%xEXCL_SEPARATORx%> codes replacing the extracted text. Such extracted
312 * texts are returned in the tags array (as values), with codes as keys.
314 * param text the text that we are filtering (in/out)
315 * param tags an array of saved strings useful to rebuild the original text (in/out)
317 function filter_save_tags(&$text,&$tags) {
319 preg_match_all('/<([^#%*].*?)>/is',$text,$list_of_newtags);
320 foreach (array_unique($list_of_newtags[0]) as $ntkey=>$value) {
321 $prefix = (string)(count($tags) + 1);
322 $tags['<%'.$prefix.EXCL_SEPARATOR.$ntkey.'%>'] = $value;
324 if (!empty($tags)) {
325 $text = str_replace($tags,array_keys($tags),$text);
330 * Add missing openpopup javascript to HTML files.
332 function filter_add_javascript($text) {
333 global $CFG;
335 if (stripos($text, '</html>') === FALSE) {
336 return $text; // this is not a html file
338 if (strpos($text, 'onclick="return openpopup') === FALSE) {
339 return $text; // no popup - no need to add javascript
341 $js ="
342 <script type=\"text/javascript\">
343 <!--
344 function openpopup(url,name,options,fullscreen) {
345 fullurl = \"".$CFG->httpswwwroot."\" + url;
346 windowobj = window.open(fullurl,name,options);
347 if (fullscreen) {
348 windowobj.moveTo(0,0);
349 windowobj.resizeTo(screen.availWidth,screen.availHeight);
351 windowobj.focus();
352 return false;
354 // -->
355 </script>";
356 if (stripos($text, '</head>') !== FALSE) {
357 //try to add it into the head element
358 $text = str_ireplace('</head>', $js.'</head>', $text);
359 return $text;
362 //last chance - try adding head element
363 return preg_replace("/<html.*?>/is", "\\0<head>".$js.'</head>', $text);