3 // This file is part of Moodle - http://moodle.org/
5 // Moodle is free software: you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation, either version 3 of the License, or
8 // (at your option) any later version.
10 // Moodle is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
19 * This plugin is used to access files by providing an url
22 * @package repository_url
23 * @copyright 2010 Dongsheng Cai {@link http://dongsheng.org}
24 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
26 require_once($CFG->dirroot
. '/repository/lib.php');
27 require_once(__DIR__
.'/locallib.php');
30 * repository_url class
31 * A subclass of repository, which is used to download a file from a specific url
34 * @package repository_url
35 * @copyright 2009 Dongsheng Cai {@link http://dongsheng.org}
36 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
38 class repository_url
extends repository
{
39 /** @var int Maximum time of recursion. */
40 const MAX_RECURSION_TIME
= 5;
41 /** @var int Maximum number of CSS imports. */
42 const MAX_CSS_IMPORTS
= 10;
43 /** @var int CSS import counter. */
44 var $cssimportcounter = 0;
45 var $processedfiles = array();
46 /** @var int Recursion counter. */
47 var $recursioncounter = 0;
50 * @param int $repositoryid
51 * @param object $context
52 * @param array $options
54 public function __construct($repositoryid, $context = SYSCONTEXTID
, $options = array()){
56 parent
::__construct($repositoryid, $context, $options);
57 $this->file_url
= optional_param('file', '', PARAM_RAW
);
58 $this->file_url
= $this->escape_url($this->file_url
);
61 public function check_login() {
62 if (!empty($this->file_url
)) {
71 public function print_login() {
72 $strdownload = get_string('download', 'repository');
73 $strname = get_string('rename', 'repository_url');
74 $strurl = get_string('url', 'repository_url');
75 if ($this->options
['ajax']) {
76 $url = new stdClass();
77 $url->label
= $strurl.': ';
82 $ret['login'] = array($url);
83 $ret['login_btn_label'] = get_string('download', 'repository_url');
84 $ret['allowcaching'] = true; // indicates that login form can be cached in filepicker.js
90 <td>{$strurl}: </td><td><input name="file" type="text" /></td>
93 <input type="submit" value="{$strdownload}" />
101 * @param string $search
104 public function get_listing($path='', $page='') {
106 $ret['list'] = array();
107 $ret['nosearch'] = true;
108 $ret['norefresh'] = true;
109 $ret['nologin'] = true;
111 $this->file_url
= clean_param($this->file_url
, PARAM_URL
);
112 if (empty($this->file_url
)) {
113 throw new repository_exception('validfiletype', 'repository_url');
116 $this->parse_file(null, $this->file_url
, $ret, true);
121 * Parses one file (either html or css)
123 * @param string $baseurl (optional) URL of the file where link to this file was found
124 * @param string $relativeurl relative or absolute link to the file
126 * @param bool $mainfile true only for main HTML false and false for all embedded/linked files
128 protected function parse_file($baseurl, $relativeurl, &$list, $mainfile = false) {
129 if (preg_match('/([\'"])(.*)\1/', $relativeurl, $matches)) {
130 $relativeurl = $matches[2];
132 if (empty($baseurl)) {
135 $url = htmlspecialchars_decode(url_to_absolute($baseurl, $relativeurl));
137 if (in_array($url, $this->processedfiles
)) {
138 // Avoid endless recursion for the same URL with same parameters.
141 // Remove the query string and anchors before check.
142 $recursioncheckurl = (new moodle_url($url))->out_omit_querystring();
143 if (in_array($recursioncheckurl, $this->processedfiles
)) {
144 $this->recursioncounter++
;
146 if ($this->recursioncounter
>= self
::MAX_RECURSION_TIME
) {
147 // Avoid endless recursion for the same URL with different parameters.
150 $this->processedfiles
[] = $url;
152 $curl->setopt(array('CURLOPT_FOLLOWLOCATION' => true, 'CURLOPT_MAXREDIRS' => 3));
153 $msg = $curl->head($url);
154 $info = $curl->get_info();
155 if ($info['http_code'] != 200) {
157 $list['error'] = $msg;
161 if ($mainfile && (strstr($info['content_type'], 'text/html') ||
empty($info['content_type']))) {
163 $htmlcontent = $curl->get($info['url']);
164 $ddoc = new DOMDocument();
165 @$ddoc->loadHTML($htmlcontent);
167 $tags = $ddoc->getElementsByTagName('img');
168 foreach ($tags as $tag) {
169 $url = $tag->getAttribute('src');
170 $this->add_image_to_list($info['url'], $url, $list);
172 // analyse embedded css (<style>)
173 $tags = $ddoc->getElementsByTagName('style');
174 foreach ($tags as $tag) {
175 if ($tag->getAttribute('type') == 'text/css') {
176 $csstoanalyze .= $tag->textContent
."\n";
179 // analyse links to css (<link type='text/css' href='...'>)
180 $tags = $ddoc->getElementsByTagName('link');
181 foreach ($tags as $tag) {
182 if ($tag->getAttribute('type') == 'text/css' && strlen($tag->getAttribute('href'))) {
183 $this->parse_file($info['url'], $tag->getAttribute('href'), $list);
186 } else if (strstr($info['content_type'], 'css')) {
188 $csscontent = $curl->get($info['url']);
189 $csstoanalyze .= $csscontent."\n";
190 } else if (strstr($info['content_type'], 'image/')) {
191 // download this file
192 $this->add_image_to_list($info['url'], $info['url'], $list);
194 $list['error'] = get_string('validfiletype', 'repository_url');
197 // parse all found css styles
198 if (strlen($csstoanalyze)) {
199 $urls = extract_css_urls($csstoanalyze);
200 if (!empty($urls['property'])) {
201 foreach ($urls['property'] as $url) {
202 $this->add_image_to_list($info['url'], $url, $list);
205 if (!empty($urls['import'])) {
206 foreach ($urls['import'] as $cssurl) {
207 // Limit the number of CSS imports to avoid infinite imports.
208 if ($this->cssimportcounter
>= self
::MAX_CSS_IMPORTS
) {
211 $this->cssimportcounter++
;
212 $this->parse_file($info['url'], $cssurl, $list);
218 protected function add_image_to_list($baseurl, $url, &$list) {
219 if (empty($list['list'])) {
220 $list['list'] = array();
222 $src = url_to_absolute($baseurl, htmlspecialchars_decode($url));
223 foreach ($list['list'] as $image) {
224 if ($image['source'] == $src) {
228 $list['list'][] = array(
229 'title'=>$this->guess_filename($url, ''),
232 'thumbnail_height'=>84,
233 'thumbnail_width'=>84
236 public function guess_filename($url, $type) {
237 $pattern = '#\/([\w_\?\-.]+)$#';
239 preg_match($pattern, $url, $matches);
240 if (empty($matches[1])) {
248 * Escapes a url by replacing spaces with %20.
250 * Note: In general moodle does not automatically escape urls, but for the purposes of making this plugin more user friendly
251 * and make it consistent with some other areas in moodle (such as mod_url), urls will automatically be escaped.
253 * If moodle_url or PARAM_URL is changed to clean characters that need to be escaped, then this function can be removed
255 * @param string $url An unescaped url.
256 * @return string The escaped url
258 protected function escape_url($url) {
259 $url = str_replace('"', '%22', $url);
260 $url = str_replace('\'', '%27', $url);
261 $url = str_replace(' ', '%20', $url);
262 $url = str_replace('<', '%3C', $url);
263 $url = str_replace('>', '%3E', $url);
267 public function supported_returntypes() {
268 return (FILE_INTERNAL | FILE_EXTERNAL
);
272 * Return the source information
274 * @param stdClass $url
275 * @return string|null
277 public function get_file_source_info($url) {
282 * file types supported by url downloader plugin
286 public function supported_filetypes() {
287 return array('web_image');
291 * Is this repository accessing private data?
295 public function contains_private_data() {