Merge branch 'MDL-76023' of https://github.com/paulholden/moodle
[moodle.git] / repository / url / lib.php
blob7b548e6be63aa366ac9b04ea97c60b9b8cddad71
1 <?php
3 // This file is part of Moodle - http://moodle.org/
4 //
5 // Moodle is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // Moodle is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
18 /**
19 * This plugin is used to access files by providing an url
21 * @since Moodle 2.0
22 * @package repository_url
23 * @copyright 2010 Dongsheng Cai {@link http://dongsheng.org}
24 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
26 require_once($CFG->dirroot . '/repository/lib.php');
27 require_once(__DIR__.'/locallib.php');
29 /**
30 * repository_url class
31 * A subclass of repository, which is used to download a file from a specific url
33 * @since Moodle 2.0
34 * @package repository_url
35 * @copyright 2009 Dongsheng Cai {@link http://dongsheng.org}
36 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
38 class repository_url extends repository {
39 /** @var int Maximum time of recursion. */
40 const MAX_RECURSION_TIME = 5;
41 var $processedfiles = array();
42 /** @var int Recursion counter. */
43 var $recursioncounter = 0;
45 /**
46 * @param int $repositoryid
47 * @param object $context
48 * @param array $options
50 public function __construct($repositoryid, $context = SYSCONTEXTID, $options = array()){
51 global $CFG;
52 parent::__construct($repositoryid, $context, $options);
53 $this->file_url = optional_param('file', '', PARAM_RAW);
54 $this->file_url = $this->escape_url($this->file_url);
57 public function check_login() {
58 if (!empty($this->file_url)) {
59 return true;
60 } else {
61 return false;
64 /**
65 * @return mixed
67 public function print_login() {
68 $strdownload = get_string('download', 'repository');
69 $strname = get_string('rename', 'repository_url');
70 $strurl = get_string('url', 'repository_url');
71 if ($this->options['ajax']) {
72 $url = new stdClass();
73 $url->label = $strurl.': ';
74 $url->id = 'fileurl';
75 $url->type = 'text';
76 $url->name = 'file';
78 $ret['login'] = array($url);
79 $ret['login_btn_label'] = get_string('download', 'repository_url');
80 $ret['allowcaching'] = true; // indicates that login form can be cached in filepicker.js
81 return $ret;
82 } else {
83 echo <<<EOD
84 <table>
85 <tr>
86 <td>{$strurl}: </td><td><input name="file" type="text" /></td>
87 </tr>
88 </table>
89 <input type="submit" value="{$strdownload}" />
90 EOD;
95 /**
96 * @param mixed $path
97 * @param string $search
98 * @return array
100 public function get_listing($path='', $page='') {
101 $ret = array();
102 $ret['list'] = array();
103 $ret['nosearch'] = true;
104 $ret['norefresh'] = true;
105 $ret['nologin'] = true;
107 $this->file_url = clean_param($this->file_url, PARAM_URL);
108 if (empty($this->file_url)) {
109 throw new repository_exception('validfiletype', 'repository_url');
112 $this->parse_file(null, $this->file_url, $ret, true);
113 return $ret;
117 * Parses one file (either html or css)
119 * @param string $baseurl (optional) URL of the file where link to this file was found
120 * @param string $relativeurl relative or absolute link to the file
121 * @param array $list
122 * @param bool $mainfile true only for main HTML false and false for all embedded/linked files
124 protected function parse_file($baseurl, $relativeurl, &$list, $mainfile = false) {
125 if (preg_match('/([\'"])(.*)\1/', $relativeurl, $matches)) {
126 $relativeurl = $matches[2];
128 if (empty($baseurl)) {
129 $url = $relativeurl;
130 } else {
131 $url = htmlspecialchars_decode(url_to_absolute($baseurl, $relativeurl));
133 if (in_array($url, $this->processedfiles)) {
134 // Avoid endless recursion for the same URL with same parameters.
135 return;
137 // Remove the query string before check.
138 $recursioncheckurl = preg_replace('/\?.*/', '', $url);
139 if (in_array($recursioncheckurl, $this->processedfiles)) {
140 $this->recursioncounter++;
142 if ($this->recursioncounter >= self::MAX_RECURSION_TIME) {
143 // Avoid endless recursion for the same URL with different parameters.
144 return;
146 $this->processedfiles[] = $url;
147 $curl = new curl;
148 $curl->setopt(array('CURLOPT_FOLLOWLOCATION' => true, 'CURLOPT_MAXREDIRS' => 3));
149 $msg = $curl->head($url);
150 $info = $curl->get_info();
151 if ($info['http_code'] != 200) {
152 if ($mainfile) {
153 $list['error'] = $msg;
155 } else {
156 $csstoanalyze = '';
157 if ($mainfile && (strstr($info['content_type'], 'text/html') || empty($info['content_type']))) {
158 // parse as html
159 $htmlcontent = $curl->get($info['url']);
160 $ddoc = new DOMDocument();
161 @$ddoc->loadHTML($htmlcontent);
162 // extract <img>
163 $tags = $ddoc->getElementsByTagName('img');
164 foreach ($tags as $tag) {
165 $url = $tag->getAttribute('src');
166 $this->add_image_to_list($info['url'], $url, $list);
168 // analyse embedded css (<style>)
169 $tags = $ddoc->getElementsByTagName('style');
170 foreach ($tags as $tag) {
171 if ($tag->getAttribute('type') == 'text/css') {
172 $csstoanalyze .= $tag->textContent."\n";
175 // analyse links to css (<link type='text/css' href='...'>)
176 $tags = $ddoc->getElementsByTagName('link');
177 foreach ($tags as $tag) {
178 if ($tag->getAttribute('type') == 'text/css' && strlen($tag->getAttribute('href'))) {
179 $this->parse_file($info['url'], $tag->getAttribute('href'), $list);
182 } else if (strstr($info['content_type'], 'css')) {
183 // parse as css
184 $csscontent = $curl->get($info['url']);
185 $csstoanalyze .= $csscontent."\n";
186 } else if (strstr($info['content_type'], 'image/')) {
187 // download this file
188 $this->add_image_to_list($info['url'], $info['url'], $list);
189 } else {
190 $list['error'] = get_string('validfiletype', 'repository_url');
193 // parse all found css styles
194 if (strlen($csstoanalyze)) {
195 $urls = extract_css_urls($csstoanalyze);
196 if (!empty($urls['property'])) {
197 foreach ($urls['property'] as $url) {
198 $this->add_image_to_list($info['url'], $url, $list);
201 if (!empty($urls['import'])) {
202 foreach ($urls['import'] as $cssurl) {
203 $this->parse_file($info['url'], $cssurl, $list);
209 protected function add_image_to_list($baseurl, $url, &$list) {
210 if (empty($list['list'])) {
211 $list['list'] = array();
213 $src = url_to_absolute($baseurl, htmlspecialchars_decode($url));
214 foreach ($list['list'] as $image) {
215 if ($image['source'] == $src) {
216 return;
219 $list['list'][] = array(
220 'title'=>$this->guess_filename($url, ''),
221 'source'=>$src,
222 'thumbnail'=>$src,
223 'thumbnail_height'=>84,
224 'thumbnail_width'=>84
227 public function guess_filename($url, $type) {
228 $pattern = '#\/([\w_\?\-.]+)$#';
229 $matches = null;
230 preg_match($pattern, $url, $matches);
231 if (empty($matches[1])) {
232 return $url;
233 } else {
234 return $matches[1];
239 * Escapes a url by replacing spaces with %20.
241 * Note: In general moodle does not automatically escape urls, but for the purposes of making this plugin more user friendly
242 * and make it consistent with some other areas in moodle (such as mod_url), urls will automatically be escaped.
244 * If moodle_url or PARAM_URL is changed to clean characters that need to be escaped, then this function can be removed
246 * @param string $url An unescaped url.
247 * @return string The escaped url
249 protected function escape_url($url) {
250 $url = str_replace('"', '%22', $url);
251 $url = str_replace('\'', '%27', $url);
252 $url = str_replace(' ', '%20', $url);
253 $url = str_replace('<', '%3C', $url);
254 $url = str_replace('>', '%3E', $url);
255 return $url;
258 public function supported_returntypes() {
259 return (FILE_INTERNAL | FILE_EXTERNAL);
263 * Return the source information
265 * @param stdClass $url
266 * @return string|null
268 public function get_file_source_info($url) {
269 return $url;
273 * file types supported by url downloader plugin
275 * @return array
277 public function supported_filetypes() {
278 return array('web_image');
282 * Is this repository accessing private data?
284 * @return bool
286 public function contains_private_data() {
287 return false;