MDL-51177 atto_html: Import htmlplus functionality
[moodle.git] / backup / cc / entities.class.php
blob4a8ef064e9f436d70e409c5f351a77dffd25b7b0
1 <?php
2 // This file is part of Moodle - http://moodle.org/
3 //
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16 /**
17 * @package moodlecore
18 * @subpackage backup-imscc
19 * @copyright 2009 Mauro Rondinelli (mauro.rondinelli [AT] uvcms.com)
20 * @copyright 2011 Darko Miletic (dmiletic@moodlerooms.com)
21 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
24 defined('MOODLE_INTERNAL') or die('Direct access to this script is forbidden.');
26 class entities {
27 /**
28 * Prepares convert for inclusion into XML
30 * @param string $value
31 * @return string
33 public static function safexml($value) {
34 $result = htmlspecialchars(html_entity_decode($value, ENT_QUOTES, 'UTF-8'),
35 ENT_NOQUOTES,
36 'UTF-8',
37 false);
38 return $result;
41 protected function prepare_content($content) {
42 $result = $content;
43 if (empty($result)) {
44 return '';
46 $encoding = null;
47 $xml_error = new libxml_errors_mgr();
48 $dom = new DOMDocument();
49 $dom->validateOnParse = false;
50 $dom->strictErrorChecking = false;
51 if ($dom->loadHTML($content)) {
52 $encoding = $dom->xmlEncoding;
54 if (empty($encoding)) {
55 $encoding = mb_detect_encoding($content, 'auto', true);
57 if (!empty($encoding) && !mb_check_encoding($content, 'UTF-8')) {
58 $result = mb_convert_encoding($content, 'UTF-8', $encoding);
61 // See if we can strip off body tag and anything outside of it.
62 foreach (array('body', 'html') as $tagname) {
63 $regex = str_replace('##', $tagname, "/<##[^>]*>(.+)<\/##>/is");
64 if (preg_match($regex, $result, $matches)) {
65 $result = $matches[1];
66 break;
69 return $result;
72 public function load_xml_resource($path_to_file) {
74 $resource = new DOMDocument();
76 cc2moodle::log_action('Load the XML resource file: '.$path_to_file);
78 if (!$resource->load($path_to_file)) {
79 cc2moodle::log_action('Cannot load the XML resource file: ' . $path_to_file, true);
82 return $resource;
85 public function update_sources($html, $root_path = '') {
87 $document = $this->load_html($html);
89 $tags = array('img' => 'src' , 'a' => 'href');
91 foreach ($tags as $tag => $attribute) {
93 $elements = $document->getElementsByTagName($tag);
95 foreach ($elements as $element) {
97 $attribute_value = $element->getAttribute($attribute);
98 $protocol = parse_url($attribute_value, PHP_URL_SCHEME);
100 if (empty($protocol)) {
101 $attribute_value = str_replace("\$IMS-CC-FILEBASE\$", "", $attribute_value);
102 $attribute_value = $this->full_path($root_path . "/" . $attribute_value, "/");
103 $attribute_value = "\$@FILEPHP@\$" . "/" . $attribute_value;
106 $element->setAttribute($attribute, $attribute_value);
110 $html = $this->html_insidebody($document);
112 return $html;
115 public function full_path($path, $dir_sep = DIRECTORY_SEPARATOR) {
117 $token = '$IMS-CC-FILEBASE$';
118 $path = str_replace($token, '', $path);
120 if (is_string($path) && ($path != '')) {
121 $dot_dir = '.';
122 $up_dir = '..';
123 $length = strlen($path);
124 $rtemp = trim($path);
125 $start = strrpos($path, $dir_sep);
126 $can_continue = ($start !== false);
127 $result = $can_continue ? '' : $path;
128 $rcount = 0;
130 while ($can_continue) {
132 $dir_part = ($start !== false) ? substr($rtemp, $start + 1, $length - $start) : $rtemp;
133 $can_continue = ($dir_part !== false);
135 if ($can_continue) {
136 if ($dir_part != $dot_dir) {
137 if ($dir_part == $up_dir) {
138 $rcount++;
139 } else {
140 if ($rcount > 0) {
141 $rcount --;
142 } else {
143 $result = ($result == '') ? $dir_part : $dir_part . $dir_sep . $result;
147 $rtemp = substr($path, 0, $start);
148 $start = strrpos($rtemp, $dir_sep);
149 $can_continue = (($start !== false) || (strlen($rtemp) > 0));
154 return $result;
157 public function include_titles ($html) {
159 $document = $this->load_html($html);
161 $images = $document->getElementsByTagName('img');
163 foreach ($images as $image) {
165 $src = $image->getAttribute('src');
166 $alt = $image->getAttribute('alt');
167 $title = $image->getAttribute('title');
169 $filename = pathinfo($src);
170 $filename = $filename['filename'];
172 $alt = empty($alt) ? $filename : $alt;
173 $title = empty($title) ? $filename : $title;
175 $image->setAttribute('alt', $alt);
176 $image->setAttribute('title', $title);
179 $html = $this->html_insidebody($document);
181 return $html;
184 public function get_external_xml ($identifier) {
186 $xpath = cc2moodle::newx_path(cc2moodle::$manifest, cc2moodle::$namespaces);
188 $files = $xpath->query('/imscc:manifest/imscc:resources/imscc:resource[@identifier="'.
189 $identifier.'"]/imscc:file/@href');
191 if (empty($files)) {
192 $response = '';
193 } else {
194 $response = $files->item(0)->nodeValue;
197 return $response;
200 public function move_files($files, $destination_folder) {
201 global $CFG, $OUTPUT;
203 if (!empty($files)) {
205 foreach ($files as $file) {
206 $source = cc2moodle::$path_to_manifest_folder . DIRECTORY_SEPARATOR . $file;
207 $destination = $destination_folder . DIRECTORY_SEPARATOR . $file;
209 $destination_directory = dirname($destination);
211 cc2moodle::log_action('Copy the file: ' . $source . ' to ' . $destination);
213 if (!file_exists($destination_directory)) {
214 mkdir($destination_directory, $CFG->directorypermissions, true);
217 $copy_success = true;
218 if (is_file($source)) {
219 $copy_success = @copy($source, $destination);
222 if (!$copy_success) {
223 echo $OUTPUT->notification('WARNING: Cannot copy the file ' . $source . ' to ' . $destination);
224 cc2moodle::log_action('Cannot copy the file ' . $source . ' to ' . $destination, false);
230 protected function get_all_files () {
231 global $CFG;
233 $all_files = array();
235 $xpath = cc2moodle::newx_path(cc2moodle::$manifest, cc2moodle::$namespaces);
237 foreach (cc2moodle::$restypes as $type) {
239 $files = $xpath->query('/imscc:manifest/imscc:resources/imscc:resource[@type="' . $type . '"]/imscc:file/@href');
241 if (!empty($files) && ($files->length > 0)) {
242 foreach ($files as $file) {
243 // Omit html files.
244 $ext = strtolower(pathinfo($file->nodeValue, PATHINFO_EXTENSION));
245 if (in_array($ext, array('html', 'htm', 'xhtml'))) {
246 continue;
248 $all_files[] = $file->nodeValue;
251 unset($files);
254 // Are there any labels?
255 $xquery = "//imscc:item/imscc:item/imscc:item[imscc:title][not(@identifierref)]";
256 $labels = $xpath->query($xquery);
257 if (!empty($labels) && ($labels->length > 0)) {
258 $tname = 'course_files';
259 $dpath = cc2moodle::$path_to_manifest_folder . DIRECTORY_SEPARATOR . $tname;
260 $rfpath = 'files.gif';
261 $fpath = $dpath . DIRECTORY_SEPARATOR . $rfpath;
263 if (!file_exists($dpath)) {
264 mkdir($dpath, $CFG->directorypermissions, true);
266 // Copy the folder.gif file.
267 $folder_gif = "{$CFG->dirroot}/pix/i/files.gif";
268 copy($folder_gif, $fpath);
269 $all_files[] = $rfpath;
272 $all_files = empty($all_files) ? '' : $all_files;
274 return $all_files;
277 public function move_all_files() {
279 $files = $this->get_all_files();
281 if (!empty($files)) {
282 $this->move_files($files, cc2moodle::$path_to_manifest_folder . DIRECTORY_SEPARATOR . 'course_files', true);
288 * @param string $html
289 * @return DOMDocument
291 private function load_html($html) {
292 // Need to make sure that the html passed has charset meta tag.
293 $metatag = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
294 if (strpos($html, $metatag) === false) {
295 $html = '<html><head>'.$metatag.'</head><body>'.$html.'</body></html>';
298 $document = new DOMDocument();
299 @$document->loadHTML($html);
301 return $document;
305 * @param DOMDocument $domdocument
306 * @return string
308 private function html_insidebody($domdocument) {
309 $html = '';
310 $bodyitems = $domdocument->getElementsByTagName('body');
311 if ($bodyitems->length > 0) {
312 $body = $bodyitems->item(0);
313 $html = str_ireplace(array('<body>', '</body>'), '', $body->C14N());
316 return $html;
319 public function generate_random_string ($length = 6) {
321 $response = '';
322 $source = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
324 if ($length > 0) {
326 $response = '';
327 $source = str_split($source, 1);
329 for ($i = 1; $i <= $length; $i++) {
330 $num = mt_rand(1, count($source));
331 $response .= $source[$num - 1];
335 return $response;
338 public function truncate_text($text, $max, $remove_html) {
340 if ($max > 10) {
341 $text = substr($text, 0, ($max - 6)) . ' [...]';
342 } else {
343 $text = substr($text, 0, $max);
346 $text = $remove_html ? strip_tags($text) : $text;
348 return $text;