weekly release 4.5dev
[moodle.git] / repository / url / lib.php
blob6b38f3b7fe66e7a3b624a212fef4872e2ff9bc0f
1 <?php
3 // This file is part of Moodle - http://moodle.org/
4 //
5 // Moodle is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // Moodle is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
18 /**
19 * This plugin is used to access files by providing an url
21 * @since Moodle 2.0
22 * @package repository_url
23 * @copyright 2010 Dongsheng Cai {@link http://dongsheng.org}
24 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
26 require_once($CFG->dirroot . '/repository/lib.php');
27 require_once(__DIR__.'/locallib.php');
29 /**
30 * repository_url class
31 * A subclass of repository, which is used to download a file from a specific url
33 * @since Moodle 2.0
34 * @package repository_url
35 * @copyright 2009 Dongsheng Cai {@link http://dongsheng.org}
36 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
38 class repository_url extends repository {
39 /** @var int Maximum time of recursion. */
40 const MAX_RECURSION_TIME = 5;
41 /** @var int Maximum number of CSS imports. */
42 protected const MAX_CSS_IMPORTS = 10;
43 /** @var int CSS import counter. */
44 protected int $cssimportcounter = 0;
45 var $processedfiles = array();
46 /** @var int Recursion counter. */
47 var $recursioncounter = 0;
48 /** @var string file URL. */
49 public $file_url;
51 /**
52 * @param int $repositoryid
53 * @param object $context
54 * @param array $options
56 public function __construct($repositoryid, $context = SYSCONTEXTID, $options = array()){
57 global $CFG;
58 parent::__construct($repositoryid, $context, $options);
59 $this->file_url = optional_param('file', '', PARAM_RAW);
60 $this->file_url = $this->escape_url($this->file_url);
63 public function check_login() {
64 if (!empty($this->file_url)) {
65 return true;
66 } else {
67 return false;
70 /**
71 * @return mixed
73 public function print_login() {
74 $strdownload = get_string('download', 'repository');
75 $strname = get_string('rename', 'repository_url');
76 $strurl = get_string('url', 'repository_url');
77 if ($this->options['ajax']) {
78 $url = new stdClass();
79 $url->label = $strurl.': ';
80 $url->id = 'fileurl';
81 $url->type = 'text';
82 $url->name = 'file';
84 $ret['login'] = array($url);
85 $ret['login_btn_label'] = get_string('download', 'repository_url');
86 $ret['allowcaching'] = true; // indicates that login form can be cached in filepicker.js
87 return $ret;
88 } else {
89 echo <<<EOD
90 <table>
91 <tr>
92 <td>{$strurl}: </td><td><input name="file" type="text" /></td>
93 </tr>
94 </table>
95 <input type="submit" value="{$strdownload}" />
96 EOD;
102 * @param mixed $path
103 * @param string $search
104 * @return array
106 public function get_listing($path='', $page='') {
107 $ret = array();
108 $ret['list'] = array();
109 $ret['nosearch'] = true;
110 $ret['norefresh'] = true;
111 $ret['nologin'] = true;
113 $this->file_url = clean_param($this->file_url, PARAM_URL);
114 if (empty($this->file_url)) {
115 throw new repository_exception('validfiletype', 'repository_url');
118 $this->parse_file(null, $this->file_url, $ret, true);
119 return $ret;
123 * Parses one file (either html or css)
125 * @param string $baseurl (optional) URL of the file where link to this file was found
126 * @param string $relativeurl relative or absolute link to the file
127 * @param array $list
128 * @param bool $mainfile true only for main HTML false and false for all embedded/linked files
130 protected function parse_file($baseurl, $relativeurl, &$list, $mainfile = false) {
131 if (preg_match('/([\'"])(.*)\1/', $relativeurl, $matches)) {
132 $relativeurl = $matches[2];
134 if (empty($baseurl)) {
135 $url = $relativeurl;
136 } else {
137 $url = htmlspecialchars_decode(url_to_absolute($baseurl, $relativeurl), ENT_COMPAT);
139 if (in_array($url, $this->processedfiles)) {
140 // Avoid endless recursion for the same URL with same parameters.
141 return;
143 // Remove the query string and anchors before check.
144 $recursioncheckurl = (new moodle_url($url))->out_omit_querystring();
145 if (in_array($recursioncheckurl, $this->processedfiles)) {
146 $this->recursioncounter++;
148 if ($this->recursioncounter >= self::MAX_RECURSION_TIME) {
149 // Avoid endless recursion for the same URL with different parameters.
150 return;
152 $this->processedfiles[] = $url;
153 $curl = new curl;
154 $curl->setopt(array('CURLOPT_FOLLOWLOCATION' => true, 'CURLOPT_MAXREDIRS' => 3));
155 $msg = $curl->head($url);
156 $info = $curl->get_info();
157 if ($info['http_code'] != 200) {
158 if ($mainfile) {
159 $list['error'] = $msg;
161 } else {
162 $csstoanalyze = '';
163 if ($mainfile && (strstr($info['content_type'], 'text/html') || empty($info['content_type']))) {
164 // parse as html
165 $htmlcontent = $curl->get($info['url']);
166 $ddoc = new DOMDocument();
167 @$ddoc->loadHTML($htmlcontent);
168 // extract <img>
169 $tags = $ddoc->getElementsByTagName('img');
170 foreach ($tags as $tag) {
171 $url = $tag->getAttribute('src');
172 $this->add_image_to_list($info['url'], $url, $list);
174 // analyse embedded css (<style>)
175 $tags = $ddoc->getElementsByTagName('style');
176 foreach ($tags as $tag) {
177 if ($tag->getAttribute('type') == 'text/css') {
178 $csstoanalyze .= $tag->textContent."\n";
181 // analyse links to css (<link type='text/css' href='...'>)
182 $tags = $ddoc->getElementsByTagName('link');
183 foreach ($tags as $tag) {
184 if ($tag->getAttribute('type') == 'text/css' && strlen($tag->getAttribute('href'))) {
185 $this->parse_file($info['url'], $tag->getAttribute('href'), $list);
188 } else if (strstr($info['content_type'], 'css')) {
189 // parse as css
190 $csscontent = $curl->get($info['url']);
191 $csstoanalyze .= $csscontent."\n";
192 } else if (strstr($info['content_type'], 'image/')) {
193 // download this file
194 $this->add_image_to_list($info['url'], $info['url'], $list);
195 } else {
196 $list['error'] = get_string('validfiletype', 'repository_url');
199 // parse all found css styles
200 if (strlen($csstoanalyze)) {
201 $urls = extract_css_urls($csstoanalyze);
202 if (!empty($urls['property'])) {
203 foreach ($urls['property'] as $url) {
204 $this->add_image_to_list($info['url'], $url, $list);
207 if (!empty($urls['import'])) {
208 foreach ($urls['import'] as $cssurl) {
209 // Limit the number of CSS imports to avoid infinite imports.
210 if ($this->cssimportcounter >= self::MAX_CSS_IMPORTS) {
211 return;
213 $this->cssimportcounter++;
214 $this->parse_file($info['url'], $cssurl, $list);
220 protected function add_image_to_list($baseurl, $url, &$list) {
221 if (empty($list['list'])) {
222 $list['list'] = array();
224 $src = url_to_absolute($baseurl, htmlspecialchars_decode($url, ENT_COMPAT));
225 foreach ($list['list'] as $image) {
226 if ($image['source'] == $src) {
227 return;
230 $list['list'][] = array(
231 'title'=>$this->guess_filename($url, ''),
232 'source'=>$src,
233 'thumbnail'=>$src,
234 'thumbnail_height'=>84,
235 'thumbnail_width'=>84
238 public function guess_filename($url, $type) {
239 $pattern = '#\/([\w_\?\-.]+)$#';
240 $matches = null;
241 preg_match($pattern, $url, $matches);
242 if (empty($matches[1])) {
243 return $url;
244 } else {
245 return $matches[1];
250 * Escapes a url by replacing spaces with %20.
252 * Note: In general moodle does not automatically escape urls, but for the purposes of making this plugin more user friendly
253 * and make it consistent with some other areas in moodle (such as mod_url), urls will automatically be escaped.
255 * If moodle_url or PARAM_URL is changed to clean characters that need to be escaped, then this function can be removed
257 * @param string $url An unescaped url.
258 * @return string The escaped url
260 protected function escape_url($url) {
261 $url = str_replace('"', '%22', $url);
262 $url = str_replace('\'', '%27', $url);
263 $url = str_replace(' ', '%20', $url);
264 $url = str_replace('<', '%3C', $url);
265 $url = str_replace('>', '%3E', $url);
266 return $url;
269 public function supported_returntypes() {
270 return (FILE_INTERNAL | FILE_EXTERNAL);
274 * Return the source information
276 * @param stdClass $url
277 * @return string|null
279 public function get_file_source_info($url) {
280 return $url;
284 * file types supported by url downloader plugin
286 * @return array
288 public function supported_filetypes() {
289 return array('web_image');
293 * Is this repository accessing private data?
295 * @return bool
297 public function contains_private_data() {
298 return false;