lib/filterlib.php

   1 <?php // $Id$
   2       // Contains special functions that are particularly useful to filters
   3
   4
   5 /// Define one exclusive separator that we'll use in the temp saved tags
   6 /// keys. It must be something rare enough to avoid having matches with
   7 /// filterobjects. MDL-18165
   8 define ('EXCL_SEPARATOR', '-%-');
   9
  10 /**
  11  * This is just a little object to define a phrase and some instructions
  12  * for how to process it.  Filters can create an array of these to pass
  13  * to the filter_phrases function below.
  14  **/
  15 class filterobject {
  16     var $phrase;
  17     var $hreftagbegin;
  18     var $hreftagend;
  19     var $casesensitive;
  20     var $fullmatch;
  21     var $replacementphrase;
  22     var $work_phrase;
  23     var $work_hreftagbegin;
  24     var $work_hreftagend;
  25     var $work_casesensitive;
  26     var $work_fullmatch;
  27     var $work_replacementphrase;
  28     var $work_calculated;
  29
  30     /// a constructor just because I like constructing
  31     function filterobject($phrase, $hreftagbegin='<span class="highlight">',
  32                                    $hreftagend='</span>',
  33                                    $casesensitive=false,
  34                                    $fullmatch=false,
  35                                    $replacementphrase=NULL) {
  36
  37         $this->phrase           = $phrase;
  38         $this->hreftagbegin     = $hreftagbegin;
  39         $this->hreftagend       = $hreftagend;
  40         $this->casesensitive    = $casesensitive;
  41         $this->fullmatch        = $fullmatch;
  42         $this->replacementphrase= $replacementphrase;
  43         $this->work_calculated  = false;
  44
  45     }
  46 }
  47
  48 /**
  49  * Process phrases intelligently found within a HTML text (such as adding links)
  50  *
  51  * param  text             the text that we are filtering
  52  * param  link_array       an array of filterobjects
  53  * param  ignoretagsopen   an array of opening tags that we should ignore while filtering
  54  * param  ignoretagsclose  an array of corresponding closing tags
  55  **/
  56 function filter_phrases ($text, &$link_array, $ignoretagsopen=NULL, $ignoretagsclose=NULL) {
  57
  58     global $CFG;
  59
  60     static $usedphrases;
  61
  62     $ignoretags = array();  //To store all the enclosig tags to be completely ignored
  63     $tags = array();        //To store all the simple tags to be ignored
  64
  65 /// A list of open/close tags that we should not replace within
  66 /// No reason why you can't put full preg expressions in here too
  67 /// eg '<script(.+?)>' to match any type of script tag
  68     $filterignoretagsopen  = array('<head>' , '<nolink>' , '<span class="nolink">');
  69     $filterignoretagsclose = array('</head>', '</nolink>', '</span>');
  70
  71 /// Invalid prefixes and suffixes for the fullmatch searches
  72 /// Every "word" character, but the underscore, is a invalid suffix or prefix.
  73 /// (nice to use this because it includes national characters (accents...) as word characters.
  74     $filterinvalidprefixes = '([^\W_])';
  75     $filterinvalidsuffixes = '([^\W_])';
  76
  77 /// Add the user defined ignore tags to the default list
  78 /// Unless specified otherwise, we will not replace within <a></a> tags
  79     if ( $ignoretagsopen === NULL ) {
  80         //$ignoretagsopen  = array('<a(.+?)>');
  81         $ignoretagsopen  = array('<a[^>]+?>');
  82         $ignoretagsclose = array('</a>');
  83     }
  84
  85     if ( is_array($ignoretagsopen) ) {
  86         foreach ($ignoretagsopen as $open) $filterignoretagsopen[] = $open;
  87         foreach ($ignoretagsclose as $close) $filterignoretagsclose[] = $close;
  88     }
  89
  90     //// Double up some magic chars to avoid "accidental matches"
  91     $text = preg_replace('/([#*%])/','\1\1',$text);
  92
  93
  94 ////Remove everything enclosed by the ignore tags from $text
  95     filter_save_ignore_tags($text,$filterignoretagsopen,$filterignoretagsclose,$ignoretags);
  96
  97 /// Remove tags from $text
  98     filter_save_tags($text,$tags);
  99
 100 /// Time to cycle through each phrase to be linked
 101     $size = sizeof($link_array);
 102     for ($n=0; $n < $size; $n++) {
 103         $linkobject =& $link_array[$n];
 104
 105     /// Set some defaults if certain properties are missing
 106     /// Properties may be missing if the filterobject class has not been used to construct the object
 107         if (empty($linkobject->phrase)) {
 108             continue;
 109         }
 110
 111     /// Avoid integers < 1000 to be linked. See bug 1446.
 112         $intcurrent = intval($linkobject->phrase);
 113         if (!empty($intcurrent) && strval($intcurrent) == $linkobject->phrase && $intcurrent < 1000) {
 114             continue;
 115         }
 116
 117     /// All this work has to be done ONLY it it hasn't been done before
 118     if (!$linkobject->work_calculated) {
 119             if (!isset($linkobject->hreftagbegin) or !isset($linkobject->hreftagend)) {
 120                 $linkobject->work_hreftagbegin = '<span class="highlight"';
 121                 $linkobject->work_hreftagend   = '</span>';
 122             } else {
 123                 $linkobject->work_hreftagbegin = $linkobject->hreftagbegin;
 124                 $linkobject->work_hreftagend   = $linkobject->hreftagend;
 125             }
 126
 127         /// Double up chars to protect true duplicates
 128         /// be cleared up before returning to the user.
 129             $linkobject->work_hreftagbegin = preg_replace('/([#*%])/','\1\1',$linkobject->work_hreftagbegin);
 130
 131             if (empty($linkobject->casesensitive)) {
 132                 $linkobject->work_casesensitive = false;
 133             } else {
 134                 $linkobject->work_casesensitive = true;
 135             }
 136             if (empty($linkobject->fullmatch)) {
 137                 $linkobject->work_fullmatch = false;
 138             } else {
 139                 $linkobject->work_fullmatch = true;
 140             }
 141
 142         /// Strip tags out of the phrase
 143             $linkobject->work_phrase = strip_tags($linkobject->phrase);
 144
 145         /// Double up chars that might cause a false match -- the duplicates will
 146         /// be cleared up before returning to the user.
 147             $linkobject->work_phrase = preg_replace('/([#*%])/','\1\1',$linkobject->work_phrase);
 148
 149         /// Set the replacement phrase properly
 150             if ($linkobject->replacementphrase) {    //We have specified a replacement phrase
 151             /// Strip tags
 152                 $linkobject->work_replacementphrase = strip_tags($linkobject->replacementphrase);
 153             } else {                                 //The replacement is the original phrase as matched below
 154                 $linkobject->work_replacementphrase = '$1';
 155             }
 156
 157         /// Quote any regular expression characters and the delimiter in the work phrase to be searched
 158             $linkobject->work_phrase = preg_quote($linkobject->work_phrase, '/');
 159
 160         /// Work calculated
 161             $linkobject->work_calculated = true;
 162
 163         }
 164
 165     /// If $CFG->filtermatchoneperpage, avoid previously (request) linked phrases
 166         if (!empty($CFG->filtermatchoneperpage)) {
 167             if (!empty($usedphrases) && in_array($linkobject->work_phrase,$usedphrases)) {
 168                 continue;
 169             }
 170         }
 171
 172     /// Regular expression modifiers
 173         $modifiers = ($linkobject->work_casesensitive) ? 's' : 'isu'; // works in unicode mode!
 174
 175     /// Do we need to do a fullmatch?
 176     /// If yes then go through and remove any non full matching entries
 177         if ($linkobject->work_fullmatch) {
 178             $notfullmatches = array();
 179             $regexp = '/'.$filterinvalidprefixes.'('.$linkobject->work_phrase.')|('.$linkobject->work_phrase.')'.$filterinvalidsuffixes.'/'.$modifiers;
 180
 181             preg_match_all($regexp,$text,$list_of_notfullmatches);
 182
 183             if ($list_of_notfullmatches) {
 184                 foreach (array_unique($list_of_notfullmatches[0]) as $key=>$value) {
 185                     $notfullmatches['<*'.$key.'*>'] = $value;
 186                 }
 187                 if (!empty($notfullmatches)) {
 188                     $text = str_replace($notfullmatches,array_keys($notfullmatches),$text);
 189                 }
 190             }
 191         }
 192
 193     /// Finally we do our highlighting
 194         if (!empty($CFG->filtermatchonepertext) || !empty($CFG->filtermatchoneperpage)) {
 195             $resulttext = preg_replace('/('.$linkobject->work_phrase.')/'.$modifiers,
 196                                       $linkobject->work_hreftagbegin.
 197                                       $linkobject->work_replacementphrase.
 198                                       $linkobject->work_hreftagend, $text, 1);
 199         } else {
 200             $resulttext = preg_replace('/('.$linkobject->work_phrase.')/'.$modifiers,
 201                                       $linkobject->work_hreftagbegin.
 202                                       $linkobject->work_replacementphrase.
 203                                       $linkobject->work_hreftagend, $text);
 204         }
 205
 206
 207     /// If the text has changed we have to look for links again
 208         if ($resulttext != $text) {
 209         /// Set $text to $resulttext
 210             $text = $resulttext;
 211         /// Remove everything enclosed by the ignore tags from $text
 212             filter_save_ignore_tags($text,$filterignoretagsopen,$filterignoretagsclose,$ignoretags);
 213         /// Remove tags from $text
 214             filter_save_tags($text,$tags);
 215         /// If $CFG->filtermatchoneperpage, save linked phrases to request
 216             if (!empty($CFG->filtermatchoneperpage)) {
 217                 $usedphrases[] = $linkobject->work_phrase;
 218             }
 219         }
 220
 221
 222     /// Replace the not full matches before cycling to next link object
 223         if (!empty($notfullmatches)) {
 224             $text = str_replace(array_keys($notfullmatches),$notfullmatches,$text);
 225             unset($notfullmatches);
 226         }
 227     }
 228
 229 /// Rebuild the text with all the excluded areas
 230
 231     if (!empty($tags)) {
 232         $text = str_replace(array_keys($tags), $tags, $text);
 233     }
 234
 235     if (!empty($ignoretags)) {
 236         $ignoretags = array_reverse($ignoretags); /// Reversed so "progressive" str_replace() will solve some nesting problems.
 237         $text = str_replace(array_keys($ignoretags),$ignoretags,$text);
 238     }
 239
 240     //// Remove the protective doubleups
 241     $text =  preg_replace('/([#*%])(\1)/','\1',$text);
 242
 243 /// Add missing javascript for popus
 244     $text = filter_add_javascript($text);
 245
 246
 247     return $text;
 248
 249 }
 250
 251
 252
 253 function filter_remove_duplicates($linkarray) {
 254
 255     $concepts  = array(); // keep a record of concepts as we cycle through
 256     $lconcepts = array(); // a lower case version for case insensitive
 257
 258     $cleanlinks = array();
 259
 260     foreach ($linkarray as $key=>$filterobject) {
 261         if ($filterobject->casesensitive) {
 262             $exists = in_array($filterobject->phrase, $concepts);
 263         } else {
 264             $exists = in_array(moodle_strtolower($filterobject->phrase), $lconcepts);
 265         }
 266
 267         if (!$exists) {
 268             $cleanlinks[] = $filterobject;
 269             $concepts[] = $filterobject->phrase;
 270             $lconcepts[] = moodle_strtolower($filterobject->phrase);
 271         }
 272     }
 273
 274     return $cleanlinks;
 275 }
 276
 277 /**
 278  * Extract open/lose tags and their contents to avoid being processed by filters.
 279  * Useful to extract pieces of code like <a>...</a> tags. It returns the text
 280  * converted with some <#xEXCL_SEPARATORx#> codes replacing the extracted text. Such extracted
 281  * texts are returned in the ignoretags array (as values), with codes as keys.
 282  *
 283  * param  text                  the text that we are filtering (in/out)
 284  * param  filterignoretagsopen  an array of open tags to start searching
 285  * param  filterignoretagsclose an array of close tags to end searching
 286  * param  ignoretags            an array of saved strings useful to rebuild the original text (in/out)
 287  **/
 288 function filter_save_ignore_tags(&$text,$filterignoretagsopen,$filterignoretagsclose,&$ignoretags) {
 289
 290 /// Remove everything enclosed by the ignore tags from $text
 291     foreach ($filterignoretagsopen as $ikey=>$opentag) {
 292         $closetag = $filterignoretagsclose[$ikey];
 293     /// form regular expression
 294         $opentag  = str_replace('/','\/',$opentag); // delimit forward slashes
 295         $closetag = str_replace('/','\/',$closetag); // delimit forward slashes
 296         $pregexp = '/'.$opentag.'(.+?)'.$closetag.'/is';
 297
 298         preg_match_all($pregexp, $text, $list_of_ignores);
 299         foreach (array_unique($list_of_ignores[0]) as $key=>$value) {
 300             $prefix = (string)(count($ignoretags) + 1);
 301             $ignoretags['<#'.$prefix.EXCL_SEPARATOR.$key.'#>'] = $value;
 302         }
 303         if (!empty($ignoretags)) {
 304             $text = str_replace($ignoretags,array_keys($ignoretags),$text);
 305         }
 306     }
 307 }
 308
 309 /**
 310  * Extract tags (any text enclosed by < and > to avoid being processed by filters.
 311  * It returns the text converted with some <%xEXCL_SEPARATORx%> codes replacing the extracted text. Such extracted
 312  * texts are returned in the tags array (as values), with codes as keys.
 313  *
 314  * param  text   the text that we are filtering (in/out)
 315  * param  tags   an array of saved strings useful to rebuild the original text (in/out)
 316  **/
 317 function filter_save_tags(&$text,&$tags) {
 318
 319     preg_match_all('/<([^#%*].*?)>/is',$text,$list_of_newtags);
 320     foreach (array_unique($list_of_newtags[0]) as $ntkey=>$value) {
 321         $prefix = (string)(count($tags) + 1);
 322         $tags['<%'.$prefix.EXCL_SEPARATOR.$ntkey.'%>'] = $value;
 323     }
 324     if (!empty($tags)) {
 325         $text = str_replace($tags,array_keys($tags),$text);
 326     }
 327 }
 328
 329 /**
 330  * Add missing openpopup javascript to HTML files.
 331  */
 332 function filter_add_javascript($text) {
 333     global $CFG;
 334
 335     if (stripos($text, '</html>') === FALSE) {
 336         return $text; // this is not a html file
 337     }
 338     if (strpos($text, 'onclick="return openpopup') === FALSE) {
 339         return $text; // no popup - no need to add javascript
 340     }
 341     $js ="
 342     <script type=\"text/javascript\">
 343     <!--
 344         function openpopup(url,name,options,fullscreen) {
 345           fullurl = \"".$CFG->httpswwwroot."\" + url;
 346           windowobj = window.open(fullurl,name,options);
 347           if (fullscreen) {
 348             windowobj.moveTo(0,0);
 349             windowobj.resizeTo(screen.availWidth,screen.availHeight);
 350           }
 351           windowobj.focus();
 352           return false;
 353         }
 354     // -->
 355     </script>";
 356     if (stripos($text, '</head>') !== FALSE) {
 357         //try to add it into the head element
 358         $text = str_ireplace('</head>', $js.'</head>', $text);
 359         return $text;
 360     }
 361
 362     //last chance - try adding head element
 363     return preg_replace("/<html.*?>/is", "\\0<head>".$js.'</head>', $text);
 364 }
 365 ?>