timeline: if a section is set to hidden and the user is not capable of editing a...
[moodle-blog-course-format.git] / lib / snoopy / Snoopy.class.inc
blob9ab7d8e1a268c5d18ddb7a1f9435b6be7727272c
1 <?php
3 /*************************************************
5 Snoopy - the PHP net client
6 Author: Monte Ohrt <monte@ispi.net>
7 Copyright (c): 1999-2008 New Digital Group, all rights reserved
8 Version: 1.2.4
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24 You may contact the author of Snoopy by e-mail at:
25 monte@ohrt.com
27 The latest version of Snoopy can be obtained from:
28 http://snoopy.sourceforge.net/
30 *************************************************/
32 class Snoopy
34     /**** Public variables ****/
35     
36     /* user definable vars */
38     var $host            =    "www.php.net";        // host name we are connecting to
39     var $port            =    80;                    // port we are connecting to
40     var $proxy_host        =    "";                    // proxy host to use
41     var $proxy_port        =    "";                    // proxy port to use
42     var $proxy_user        =    "";                    // proxy user to use
43     var $proxy_pass        =    "";                    // proxy password to use
44     
45     var $agent            =    "Snoopy v1.2.4";    // agent we masquerade as
46     var    $referer        =    "";                    // referer info to pass
47     var $cookies        =    array();            // array of cookies to pass
48                                                 // $cookies["username"]="joe";
49     var    $rawheaders        =    array();            // array of raw headers to send
50                                                 // $rawheaders["Content-type"]="text/html";
52     var $maxredirs        =    5;                    // http redirection depth maximum. 0 = disallow
53     var $lastredirectaddr    =    "";                // contains address of last redirected address
54     var    $offsiteok        =    true;                // allows redirection off-site
55     var $maxframes        =    0;                    // frame content depth maximum. 0 = disallow
56     var $expandlinks    =    true;                // expand links to fully qualified URLs.
57                                                 // this only applies to fetchlinks()
58                                                 // submitlinks(), and submittext()
59     var $passcookies    =    true;                // pass set cookies back through redirects
60                                                 // NOTE: this currently does not respect
61                                                 // dates, domains or paths.
62     
63     var    $user            =    "";                    // user for http authentication
64     var    $pass            =    "";                    // password for http authentication
65     
66     // http accept types
67     var $accept            =    "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
68     
69     var $results        =    "";                    // where the content is put
70         
71     var $error            =    "";                    // error messages sent here
72     var    $response_code    =    "";                    // response code returned from server
73     var    $headers        =    array();            // headers returned from server sent here
74     var    $maxlength        =    500000;                // max return data length (body)
75     var $read_timeout    =    0;                    // timeout on read operations, in seconds
76                                                 // supported only since PHP 4 Beta 4
77                                                 // set to 0 to disallow timeouts
78     var $timed_out        =    false;                // if a read operation timed out
79     var    $status            =    0;                    // http request status
81     var $temp_dir        =    "/tmp";                // temporary directory that the webserver
82                                                 // has permission to write to.
83                                                 // under Windows, this should be C:\temp
85     var    $curl_path        =    "/usr/local/bin/curl";
86                                                 // Snoopy will use cURL for fetching
87                                                 // SSL content if a full system path to
88                                                 // the cURL binary is supplied here.
89                                                 // set to false if you do not have
90                                                 // cURL installed. See http://curl.haxx.se
91                                                 // for details on installing cURL.
92                                                 // Snoopy does *not* use the cURL
93                                                 // library functions built into php,
94                                                 // as these functions are not stable
95                                                 // as of this Snoopy release.
96     
97     /**** Private variables ****/    
98     
99     var    $_maxlinelen    =    4096;                // max line length (headers)
100     
101     var $_httpmethod    =    "GET";                // default http request method
102     var $_httpversion    =    "HTTP/1.0";            // default http request version
103     var $_submit_method    =    "POST";                // default submit method
104     var $_submit_type    =    "application/x-www-form-urlencoded";    // default submit type
105     var $_mime_boundary    =   "";                    // MIME boundary for multipart/form-data submit type
106     var $_redirectaddr    =    false;                // will be set if page fetched is a redirect
107     var $_redirectdepth    =    0;                    // increments on an http redirect
108     var $_frameurls        =     array();            // frame src urls
109     var $_framedepth    =    0;                    // increments on frame depth
110     
111     var $_isproxy        =    false;                // set if using a proxy server
112     var $_fp_timeout    =    30;                    // timeout for socket connection
114 /*======================================================================*\
115     Function:    fetch
116     Purpose:    fetch the contents of a web page
117                 (and possibly other protocols in the
118                 future like ftp, nntp, gopher, etc.)
119     Input:        $URI    the location of the page to fetch
120     Output:        $this->results    the output text from the fetch
121 \*======================================================================*/
123     function fetch($URI)
124     {
125     
126         //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
127         $URI_PARTS = parse_url($URI);
128         if (!empty($URI_PARTS["user"]))
129             $this->user = $URI_PARTS["user"];
130         if (!empty($URI_PARTS["pass"]))
131             $this->pass = $URI_PARTS["pass"];
132         if (empty($URI_PARTS["query"]))
133             $URI_PARTS["query"] = '';
134         if (empty($URI_PARTS["path"]))
135             $URI_PARTS["path"] = '';
136                 
137         switch(strtolower($URI_PARTS["scheme"]))
138         {
139             case "http":
140                 $this->host = $URI_PARTS["host"];
141                 if(!empty($URI_PARTS["port"]))
142                     $this->port = $URI_PARTS["port"];
143                 if($this->_connect($fp))
144                 {
145                     if($this->_isproxy)
146                     {
147                         // using proxy, send entire URI
148                         $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
149                     }
150                     else
151                     {
152                         $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
153                         // no proxy, send only the path
154                         $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
155                     }
156                     
157                     $this->_disconnect($fp);
159                     if($this->_redirectaddr)
160                     {
161                         /* url was redirected, check if we've hit the max depth */
162                         if($this->maxredirs > $this->_redirectdepth)
163                         {
164                             // only follow redirect if it's on this site, or offsiteok is true
165                             if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
166                             {
167                                 /* follow the redirect */
168                                 $this->_redirectdepth++;
169                                 $this->lastredirectaddr=$this->_redirectaddr;
170                                 $this->fetch($this->_redirectaddr);
171                             }
172                         }
173                     }
175                     if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
176                     {
177                         $frameurls = $this->_frameurls;
178                         $this->_frameurls = array();
179                         
180                         while(list(,$frameurl) = each($frameurls))
181                         {
182                             if($this->_framedepth < $this->maxframes)
183                             {
184                                 $this->fetch($frameurl);
185                                 $this->_framedepth++;
186                             }
187                             else
188                                 break;
189                         }
190                     }                    
191                 }
192                 else
193                 {
194                     return false;
195                 }
196                 return true;                    
197                 break;
198             case "https":
199                 if(!$this->curl_path)
200                     return false;
201                 if(function_exists("is_executable"))
202                     if (!is_executable($this->curl_path))
203                         return false;
204                 $this->host = $URI_PARTS["host"];
205                 if(!empty($URI_PARTS["port"]))
206                     $this->port = $URI_PARTS["port"];
207                 if($this->_isproxy)
208                 {
209                     // using proxy, send entire URI
210                     $this->_httpsrequest($URI,$URI,$this->_httpmethod);
211                 }
212                 else
213                 {
214                     $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
215                     // no proxy, send only the path
216                     $this->_httpsrequest($path, $URI, $this->_httpmethod);
217                 }
219                 if($this->_redirectaddr)
220                 {
221                     /* url was redirected, check if we've hit the max depth */
222                     if($this->maxredirs > $this->_redirectdepth)
223                     {
224                         // only follow redirect if it's on this site, or offsiteok is true
225                         if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
226                         {
227                             /* follow the redirect */
228                             $this->_redirectdepth++;
229                             $this->lastredirectaddr=$this->_redirectaddr;
230                             $this->fetch($this->_redirectaddr);
231                         }
232                     }
233                 }
235                 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
236                 {
237                     $frameurls = $this->_frameurls;
238                     $this->_frameurls = array();
240                     while(list(,$frameurl) = each($frameurls))
241                     {
242                         if($this->_framedepth < $this->maxframes)
243                         {
244                             $this->fetch($frameurl);
245                             $this->_framedepth++;
246                         }
247                         else
248                             break;
249                     }
250                 }                    
251                 return true;                    
252                 break;
253             default:
254                 // not a valid protocol
255                 $this->error    =    'Invalid protocol "'.$URI_PARTS["scheme"]; // moodlefix
256                 return false;
257                 break;
258         }        
259         return true;
260     }
262 /*======================================================================*\
263     Function:    submit
264     Purpose:    submit an http form
265     Input:        $URI    the location to post the data
266                 $formvars    the formvars to use.
267                     format: $formvars["var"] = "val";
268                 $formfiles  an array of files to submit
269                     format: $formfiles["var"] = "/dir/filename.ext";
270     Output:        $this->results    the text output from the post
271 \*======================================================================*/
273     function submit($URI, $formvars="", $formfiles="")
274     {
275         unset($postdata);
276         
277         $postdata = $this->_prepare_post_body($formvars, $formfiles);
278             
279         $URI_PARTS = parse_url($URI);
280         if (!empty($URI_PARTS["user"]))
281             $this->user = $URI_PARTS["user"];
282         if (!empty($URI_PARTS["pass"]))
283             $this->pass = $URI_PARTS["pass"];
284         if (empty($URI_PARTS["query"]))
285             $URI_PARTS["query"] = '';
286         if (empty($URI_PARTS["path"]))
287             $URI_PARTS["path"] = '';
289         switch(strtolower($URI_PARTS["scheme"]))
290         {
291             case "http":
292                 $this->host = $URI_PARTS["host"];
293                 if(!empty($URI_PARTS["port"]))
294                     $this->port = $URI_PARTS["port"];
295                 if($this->_connect($fp))
296                 {
297                     if($this->_isproxy)
298                     {
299                         // using proxy, send entire URI
300                         $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
301                     }
302                     else
303                     {
304                         $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
305                         // no proxy, send only the path
306                         $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
307                     }
308                     
309                     $this->_disconnect($fp);
311                     if($this->_redirectaddr)
312                     {
313                         /* url was redirected, check if we've hit the max depth */
314                         if($this->maxredirs > $this->_redirectdepth)
315                         {                        
316                             if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
317                                 $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);                        
318                             
319                             // only follow redirect if it's on this site, or offsiteok is true
320                             if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
321                             {
322                                 /* follow the redirect */
323                                 $this->_redirectdepth++;
324                                 $this->lastredirectaddr=$this->_redirectaddr;
325                                 if( strpos( $this->_redirectaddr, "?" ) > 0 )
326                                     $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
327                                 else
328                                     $this->submit($this->_redirectaddr,$formvars, $formfiles);
329                             }
330                         }
331                     }
333                     if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
334                     {
335                         $frameurls = $this->_frameurls;
336                         $this->_frameurls = array();
337                         
338                         while(list(,$frameurl) = each($frameurls))
339                         {                                                        
340                             if($this->_framedepth < $this->maxframes)
341                             {
342                                 $this->fetch($frameurl);
343                                 $this->_framedepth++;
344                             }
345                             else
346                                 break;
347                         }
348                     }                    
349                     
350                 }
351                 else
352                 {
353                     return false;
354                 }
355                 return true;                    
356                 break;
357             case "https":
358                 if(!$this->curl_path)
359                     return false;
360                 if(function_exists("is_executable"))
361                     if (!is_executable($this->curl_path))
362                         return false;
363                 $this->host = $URI_PARTS["host"];
364                 if(!empty($URI_PARTS["port"]))
365                     $this->port = $URI_PARTS["port"];
366                 if($this->_isproxy)
367                 {
368                     // using proxy, send entire URI
369                     $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
370                 }
371                 else
372                 {
373                     $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
374                     // no proxy, send only the path
375                     $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
376                 }
378                 if($this->_redirectaddr)
379                 {
380                     /* url was redirected, check if we've hit the max depth */
381                     if($this->maxredirs > $this->_redirectdepth)
382                     {                        
383                         if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
384                             $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);                        
386                         // only follow redirect if it's on this site, or offsiteok is true
387                         if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
388                         {
389                             /* follow the redirect */
390                             $this->_redirectdepth++;
391                             $this->lastredirectaddr=$this->_redirectaddr;
392                             if( strpos( $this->_redirectaddr, "?" ) > 0 )
393                                 $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
394                             else
395                                 $this->submit($this->_redirectaddr,$formvars, $formfiles);
396                         }
397                     }
398                 }
400                 if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
401                 {
402                     $frameurls = $this->_frameurls;
403                     $this->_frameurls = array();
405                     while(list(,$frameurl) = each($frameurls))
406                     {                                                        
407                         if($this->_framedepth < $this->maxframes)
408                         {
409                             $this->fetch($frameurl);
410                             $this->_framedepth++;
411                         }
412                         else
413                             break;
414                     }
415                 }                    
416                 return true;                    
417                 break;
418                 
419             default:
420                 // not a valid protocol
421                 $this->error    =    'Invalid protocol "'.$URI_PARTS["scheme"]; //moodlefix
422                 return false;
423                 break;
424         }        
425         return true;
426     }
428 /*======================================================================*\
429     Function:    fetchlinks
430     Purpose:    fetch the links from a web page
431     Input:        $URI    where you are fetching from
432     Output:        $this->results    an array of the URLs
433 \*======================================================================*/
435     function fetchlinks($URI)
436     {
437         if ($this->fetch($URI))
438         {            
439             if($this->lastredirectaddr)
440                 $URI = $this->lastredirectaddr;
441             if(is_array($this->results))
442             {
443                 for($x=0;$x<count($this->results);$x++)
444                     $this->results[$x] = $this->_striplinks($this->results[$x]);
445             }
446             else
447                 $this->results = $this->_striplinks($this->results);
449             if($this->expandlinks)
450                 $this->results = $this->_expandlinks($this->results, $URI);
451             return true;
452         }
453         else
454             return false;
455     }
457 /*======================================================================*\
458     Function:    fetchform
459     Purpose:    fetch the form elements from a web page
460     Input:        $URI    where you are fetching from
461     Output:        $this->results    the resulting html form
462 \*======================================================================*/
464     function fetchform($URI)
465     {
466         
467         if ($this->fetch($URI))
468         {            
470             if(is_array($this->results))
471             {
472                 for($x=0;$x<count($this->results);$x++)
473                     $this->results[$x] = $this->_stripform($this->results[$x]);
474             }
475             else
476                 $this->results = $this->_stripform($this->results);
477             
478             return true;
479         }
480         else
481             return false;
482     }
483     
484     
485 /*======================================================================*\
486     Function:    fetchtext
487     Purpose:    fetch the text from a web page, stripping the links
488     Input:        $URI    where you are fetching from
489     Output:        $this->results    the text from the web page
490 \*======================================================================*/
492     function fetchtext($URI)
493     {
494         if($this->fetch($URI))
495         {            
496             if(is_array($this->results))
497             {
498                 for($x=0;$x<count($this->results);$x++)
499                     $this->results[$x] = $this->_striptext($this->results[$x]);
500             }
501             else
502                 $this->results = $this->_striptext($this->results);
503             return true;
504         }
505         else
506             return false;
507     }
509 /*======================================================================*\
510     Function:    submitlinks
511     Purpose:    grab links from a form submission
512     Input:        $URI    where you are submitting from
513     Output:        $this->results    an array of the links from the post
514 \*======================================================================*/
516     function submitlinks($URI, $formvars="", $formfiles="")
517     {
518         if($this->submit($URI,$formvars, $formfiles))
519         {            
520             if($this->lastredirectaddr)
521                 $URI = $this->lastredirectaddr;
522             if(is_array($this->results))
523             {
524                 for($x=0;$x<count($this->results);$x++)
525                 {
526                     $this->results[$x] = $this->_striplinks($this->results[$x]);
527                     if($this->expandlinks)
528                         $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
529                 }
530             }
531             else
532             {
533                 $this->results = $this->_striplinks($this->results);
534                 if($this->expandlinks)
535                     $this->results = $this->_expandlinks($this->results,$URI);
536             }
537             return true;
538         }
539         else
540             return false;
541     }
543 /*======================================================================*\
544     Function:    submittext
545     Purpose:    grab text from a form submission
546     Input:        $URI    where you are submitting from
547     Output:        $this->results    the text from the web page
548 \*======================================================================*/
550     function submittext($URI, $formvars = "", $formfiles = "")
551     {
552         if($this->submit($URI,$formvars, $formfiles))
553         {            
554             if($this->lastredirectaddr)
555                 $URI = $this->lastredirectaddr;
556             if(is_array($this->results))
557             {
558                 for($x=0;$x<count($this->results);$x++)
559                 {
560                     $this->results[$x] = $this->_striptext($this->results[$x]);
561                     if($this->expandlinks)
562                         $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
563                 }
564             }
565             else
566             {
567                 $this->results = $this->_striptext($this->results);
568                 if($this->expandlinks)
569                     $this->results = $this->_expandlinks($this->results,$URI);
570             }
571             return true;
572         }
573         else
574             return false;
575     }
577     
579 /*======================================================================*\
580     Function:    set_submit_multipart
581     Purpose:    Set the form submission content type to
582                 multipart/form-data
583 \*======================================================================*/
584     function set_submit_multipart()
585     {
586         $this->_submit_type = "multipart/form-data";
587     }
589     
590 /*======================================================================*\
591     Function:    set_submit_normal
592     Purpose:    Set the form submission content type to
593                 application/x-www-form-urlencoded
594 \*======================================================================*/
595     function set_submit_normal()
596     {
597         $this->_submit_type = "application/x-www-form-urlencoded";
598     }
600     
601     
603 /*======================================================================*\
604     Private functions
605 \*======================================================================*/
606     
607     
608 /*======================================================================*\
609     Function:    _striplinks
610     Purpose:    strip the hyperlinks from an html document
611     Input:        $document    document to strip.
612     Output:        $match        an array of the links
613 \*======================================================================*/
615     function _striplinks($document)
616     {    
617         preg_match_all("'<\s*a\s.*?href\s*=\s*            # find <a href=
618                         ([\"\'])?                    # find single or double quote
619                         (?(1) (.*?)\\1 | ([^\s\>]+))        # if quote found, match up to next matching
620                                                     # quote, otherwise match up to next space
621                         'isx",$document,$links);
622                         
624         // catenate the non-empty matches from the conditional subpattern
626         while(list($key,$val) = each($links[2]))
627         {
628             if(!empty($val))
629                 $match[] = $val;
630         }                
631         
632         while(list($key,$val) = each($links[3]))
633         {
634             if(!empty($val))
635                 $match[] = $val;
636         }        
637         
638         // return the links
639         return $match;
640     }
642 /*======================================================================*\
643     Function:    _stripform
644     Purpose:    strip the form elements from an html document
645     Input:        $document    document to strip.
646     Output:        $match        an array of the links
647 \*======================================================================*/
649     function _stripform($document)
650     {    
651         preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
652         
653         // catenate the matches
654         $match = implode("\r\n",$elements[0]);
655                 
656         // return the links
657         return $match;
658     }
660     
661     
662 /*======================================================================*\
663     Function:    _striptext
664     Purpose:    strip the text from an html document
665     Input:        $document    document to strip.
666     Output:        $text        the resulting text
667 \*======================================================================*/
669     function _striptext($document)
670     {
671         
672         // I didn't use preg eval (//e) since that is only available in PHP 4.0.
673         // so, list your entities one by one here. I included some of the
674         // more common ones.
675                                 
676         $search = array("'<script[^>]*?>.*?</script>'si",    // strip out javascript
677                         "'<[\/\!]*?[^<>]*?>'si",            // strip out html tags
678                         "'([\r\n])[\s]+'",                    // strip out white space
679                         "'&(quot|#34|#034|#x22);'i",        // replace html entities
680                         "'&(amp|#38|#038|#x26);'i",            // added hexadecimal values
681                         "'&(lt|#60|#060|#x3c);'i",
682                         "'&(gt|#62|#062|#x3e);'i",
683                         "'&(nbsp|#160|#xa0);'i",
684                         "'&(iexcl|#161);'i",
685                         "'&(cent|#162);'i",
686                         "'&(pound|#163);'i",
687                         "'&(copy|#169);'i",
688                         "'&(reg|#174);'i",
689                         "'&(deg|#176);'i",
690                         "'&(#39|#039|#x27);'",
691                         "'&(euro|#8364);'i",                // europe
692                         "'&a(uml|UML);'",                    // german
693                         "'&o(uml|UML);'",
694                         "'&u(uml|UML);'",
695                         "'&A(uml|UML);'",
696                         "'&O(uml|UML);'",
697                         "'&U(uml|UML);'",
698                         "'&szlig;'i",
699                         );
700         $replace = array(    "",
701                             "",
702                             "\\1",
703                             "\"",
704                             "&",
705                             "<",
706                             ">",
707                             " ",
708                             chr(161),
709                             chr(162),
710                             chr(163),
711                             chr(169),
712                             chr(174),
713                             chr(176),
714                             chr(39),
715                             chr(128),
716                             "�",
717                             "�",
718                             "�",
719                             "�",
720                             "�",
721                             "�",
722                             "�",
723                         );
724                     
725         $text = preg_replace($search,$replace,$document);
726                                 
727         return $text;
728     }
730 /*======================================================================*\
731     Function:    _expandlinks
732     Purpose:    expand each link into a fully qualified URL
733     Input:        $links            the links to qualify
734                 $URI            the full URI to get the base from
735     Output:        $expandedLinks    the expanded links
736 \*======================================================================*/
738     function _expandlinks($links,$URI)
739     {
740         
741         preg_match("/^[^\?]+/",$URI,$match);
743         $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
744         $match = preg_replace("|/$|","",$match);
745         $match_part = parse_url($match);
746         $match_root =
747         $match_part["scheme"]."://".$match_part["host"];
748                 
749         $search = array(     "|^http://".preg_quote($this->host)."|i",
750                             "|^(\/)|i",
751                             "|^(?!http://)(?!mailto:)|i",
752                             "|/\./|",
753                             "|/[^\/]+/\.\./|"
754                         );
755                         
756         $replace = array(    "",
757                             $match_root."/",
758                             $match."/",
759                             "/",
760                             "/"
761                         );            
762                 
763         $expandedLinks = preg_replace($search,$replace,$links);
765         return $expandedLinks;
766     }
768 /*======================================================================*\
769     Function:    _httprequest
770     Purpose:    go get the http data from the server
771     Input:        $url        the url to fetch
772                 $fp            the current open file pointer
773                 $URI        the full URI
774                 $body        body contents to send if any (POST)
775     Output:        
776 \*======================================================================*/
777     
778     function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
779     {
780         $cookie_headers = '';
781         if($this->passcookies && $this->_redirectaddr)
782             $this->setcookies();
783             
784         $URI_PARTS = parse_url($URI);
785         if(empty($url))
786             $url = "/";
787         $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";        
788         if(!empty($this->agent))
789             $headers .= "User-Agent: ".$this->agent."\r\n";
790         if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
791             $headers .= "Host: ".$this->host;
792             if(!empty($this->port))
793                 $headers .= ":".$this->port;
794             $headers .= "\r\n";
795         }
796         if(!empty($this->accept))
797             $headers .= "Accept: ".$this->accept."\r\n";
798         if(!empty($this->referer))
799             $headers .= "Referer: ".$this->referer."\r\n";
800         if(!empty($this->cookies))
801         {            
802             if(!is_array($this->cookies))
803                 $this->cookies = (array)$this->cookies;
804     
805             reset($this->cookies);
806             if ( count($this->cookies) > 0 ) {
807                 $cookie_headers .= 'Cookie: ';
808                 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
809                 $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
810                 }
811                 $headers .= substr($cookie_headers,0,-2) . "\r\n";
812             } 
813         }
814         if(!empty($this->rawheaders))
815         {
816             if(!is_array($this->rawheaders))
817                 $this->rawheaders = (array)$this->rawheaders;
818             while(list($headerKey,$headerVal) = each($this->rawheaders))
819                 $headers .= $headerKey.": ".$headerVal."\r\n";
820         }
821         if(!empty($content_type)) {
822             $headers .= "Content-type: $content_type";
823             if ($content_type == "multipart/form-data")
824                 $headers .= "; boundary=".$this->_mime_boundary;
825             $headers .= "\r\n";
826         }
827         if(!empty($body))    
828             $headers .= "Content-length: ".strlen($body)."\r\n";
829         if(!empty($this->user) || !empty($this->pass))    
830             $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
831         
832         //add proxy auth headers
833         if(!empty($this->proxy_user))    
834             $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
837         $headers .= "\r\n";
838         
839         // set the read timeout if needed
840         if ($this->read_timeout > 0)
841             socket_set_timeout($fp, $this->read_timeout);
842         $this->timed_out = false;
843         
844         fwrite($fp,$headers.$body,strlen($headers.$body));
845         
846         $this->_redirectaddr = false;
847         unset($this->headers);
848                         
849         while($currentHeader = fgets($fp,$this->_maxlinelen))
850         {
851             if ($this->read_timeout > 0 && $this->_check_timeout($fp))
852             {
853                 $this->status=-100;
854                 return false;
855             }
856                 
857             if($currentHeader == "\r\n")
858                 break;
859                         
860             // if a header begins with Location: or URI:, set the redirect
861             if(preg_match("/^(Location:|URI:)/i",$currentHeader))
862             {
863                 // get URL portion of the redirect
864                 preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
865                 // look for :// in the Location header to see if hostname is included
866                 if(!preg_match("|\:\/\/|",$matches[2]))
867                 {
868                     // no host in the path, so prepend
869                     $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
870                     // eliminate double slash
871                     if(!preg_match("|^/|",$matches[2]))
872                             $this->_redirectaddr .= "/".$matches[2];
873                     else
874                             $this->_redirectaddr .= $matches[2];
875                 }
876                 else
877                     $this->_redirectaddr = $matches[2];
878             }
879         
880             if(preg_match("|^HTTP/|",$currentHeader))
881             {
882                 if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
883                 {
884                     $this->status= $status[1];
885                 }                
886                 $this->response_code = $currentHeader;
887             }
888                 
889             $this->headers[] = $currentHeader;
890         }
892         $results = '';
893         do {
894             $_data = fread($fp, $this->maxlength);
895             if (strlen($_data) == 0) {
896                 break;
897             }
898             $results .= $_data;
899         } while(true);
901         if ($this->read_timeout > 0 && $this->_check_timeout($fp))
902         {
903             $this->status=-100;
904             return false;
905         }
906         
907         // check if there is a a redirect meta tag
908         
909         if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
911         {
912             $this->_redirectaddr = $this->_expandlinks($match[1],$URI);    
913         }
915         // have we hit our frame depth and is there frame src to fetch?
916         if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
917         {
918             $this->results[] = $results;
919             for($x=0; $x<count($match[1]); $x++)
920                 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
921         }
922         // have we already fetched framed content?
923         elseif(is_array($this->results))
924             $this->results[] = $results;
925         // no framed content
926         else
927             $this->results = $results;
928         
929         return true;
930     }
932 /*======================================================================*\
933     Function:    _httpsrequest
934     Purpose:    go get the https data from the server using curl
935     Input:        $url        the url to fetch
936                 $URI        the full URI
937                 $body        body contents to send if any (POST)
938     Output:        
939 \*======================================================================*/
940     
941     function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
942     {
943         if($this->passcookies && $this->_redirectaddr)
944             $this->setcookies();
946         $headers = array();        
947                     
948         $URI_PARTS = parse_url($URI);
949         if(empty($url))
950             $url = "/";
951         // GET ... header not needed for curl
952         //$headers[] = $http_method." ".$url." ".$this->_httpversion;        
953         if(!empty($this->agent))
954             $headers[] = "User-Agent: ".$this->agent;
955         if(!empty($this->host))
956             if(!empty($this->port))
957                 $headers[] = "Host: ".$this->host.":".$this->port;
958             else
959                 $headers[] = "Host: ".$this->host;
960         if(!empty($this->accept))
961             $headers[] = "Accept: ".$this->accept;
962         if(!empty($this->referer))
963             $headers[] = "Referer: ".$this->referer;
964         if(!empty($this->cookies))
965         {            
966             if(!is_array($this->cookies))
967                 $this->cookies = (array)$this->cookies;
968     
969             reset($this->cookies);
970             if ( count($this->cookies) > 0 ) {
971                 $cookie_str = 'Cookie: ';
972                 foreach ( $this->cookies as $cookieKey => $cookieVal ) {
973                 $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
974                 }
975                 $headers[] = substr($cookie_str,0,-2);
976             }
977         }
978         if(!empty($this->rawheaders))
979         {
980             if(!is_array($this->rawheaders))
981                 $this->rawheaders = (array)$this->rawheaders;
982             while(list($headerKey,$headerVal) = each($this->rawheaders))
983                 $headers[] = $headerKey.": ".$headerVal;
984         }
985         if(!empty($content_type)) {
986             if ($content_type == "multipart/form-data")
987                 $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
988             else
989                 $headers[] = "Content-type: $content_type";
990         }
991         if(!empty($body))    
992             $headers[] = "Content-length: ".strlen($body);
993         if(!empty($this->user) || !empty($this->pass))    
994             $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
995             
996         for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
997             $safer_header = strtr( $headers[$curr_header], "\"", " " );
998             $cmdline_params .= " -H \"".$safer_header."\"";
999         }
1000         
1001         if(!empty($body))
1002             $cmdline_params .= " -d \"$body\"";
1003         
1004         if($this->read_timeout > 0)
1005             $cmdline_params .= " -m ".$this->read_timeout;
1006         
1007         $headerfile = tempnam($temp_dir, "sno");
1009         exec($this->curl_path." -k -D \"$headerfile\"".$cmdline_params." \"".escapeshellcmd($URI)."\"",$results,$return);
1010         
1011         if($return)
1012         {
1013             $this->error = "Error: cURL could not retrieve the document, error $return.";
1014             return false;
1015         }
1016             
1017             
1018         $results = implode("\r\n",$results);
1019         
1020         $result_headers = file("$headerfile");
1021                         
1022         $this->_redirectaddr = false;
1023         unset($this->headers);
1024                         
1025         for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1026         {
1027             
1028             // if a header begins with Location: or URI:, set the redirect
1029             if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1030             {
1031                 // get URL portion of the redirect
1032                 preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1033                 // look for :// in the Location header to see if hostname is included
1034                 if(!preg_match("|\:\/\/|",$matches[2]))
1035                 {
1036                     // no host in the path, so prepend
1037                     $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1038                     // eliminate double slash
1039                     if(!preg_match("|^/|",$matches[2]))
1040                             $this->_redirectaddr .= "/".$matches[2];
1041                     else
1042                             $this->_redirectaddr .= $matches[2];
1043                 }
1044                 else
1045                     $this->_redirectaddr = $matches[2];
1046             }
1047         
1048             if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1049                 $this->response_code = $result_headers[$currentHeader];
1051             $this->headers[] = $result_headers[$currentHeader];
1052         }
1054         // check if there is a a redirect meta tag
1055         
1056         if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1057         {
1058             $this->_redirectaddr = $this->_expandlinks($match[1],$URI);    
1059         }
1061         // have we hit our frame depth and is there frame src to fetch?
1062         if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1063         {
1064             $this->results[] = $results;
1065             for($x=0; $x<count($match[1]); $x++)
1066                 $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1067         }
1068         // have we already fetched framed content?
1069         elseif(is_array($this->results))
1070             $this->results[] = $results;
1071         // no framed content
1072         else
1073             $this->results = $results;
1075         unlink("$headerfile");
1076         
1077         return true;
1078     }
1080 /*======================================================================*\
1081     Function:    setcookies()
1082     Purpose:    set cookies for a redirection
1083 \*======================================================================*/
1084     
1085     function setcookies()
1086     {
1087         for($x=0; $x<count($this->headers); $x++)
1088         {
1089         if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1090             $this->cookies[$match[1]] = urldecode($match[2]);
1091         }
1092     }
1094     
1095 /*======================================================================*\
1096     Function:    _check_timeout
1097     Purpose:    checks whether timeout has occurred
1098     Input:        $fp    file pointer
1099 \*======================================================================*/
1101     function _check_timeout($fp)
1102     {
1103         if ($this->read_timeout > 0) {
1104             $fp_status = socket_get_status($fp);
1105             if ($fp_status["timed_out"]) {
1106                 $this->timed_out = true;
1107                 return true;
1108             }
1109         }
1110         return false;
1111     }
1113 /*======================================================================*\
1114     Function:    _connect
1115     Purpose:    make a socket connection
1116     Input:        $fp    file pointer
1117 \*======================================================================*/
1118     
1119     function _connect(&$fp)
1120     {
1121         if(!empty($this->proxy_host) && !empty($this->proxy_port))
1122             {
1123                 $this->_isproxy = true;
1124                 
1125                 $host = $this->proxy_host;
1126                 $port = $this->proxy_port;
1127             }
1128         else
1129         {
1130             $host = $this->host;
1131             $port = $this->port;
1132         }
1133     
1134         $this->status = 0;
1135         
1136         if($fp = fsockopen(
1137                     $host,
1138                     $port,
1139                     $errno,
1140                     $errstr,
1141                     $this->_fp_timeout
1142                     ))
1143         {
1144             // socket connection succeeded
1146             return true;
1147         }
1148         else
1149         {
1150             // socket connection failed
1151             $this->status = $errno;
1152             switch($errno)
1153             {
1154                 case -3:
1155                     $this->error="socket creation failed (-3)";
1156                 case -4:
1157                     $this->error="dns lookup failure (-4)";
1158                 case -5:
1159                     $this->error="connection refused or timed out (-5)";
1160                 default:
1161                     $this->error="connection failed (".$errno.")";
1162             }
1163             return false;
1164         }
1165     }
1166 /*======================================================================*\
1167     Function:    _disconnect
1168     Purpose:    disconnect a socket connection
1169     Input:        $fp    file pointer
1170 \*======================================================================*/
1171     
1172     function _disconnect($fp)
1173     {
1174         return(fclose($fp));
1175     }
1177     
1178 /*======================================================================*\
1179     Function:    _prepare_post_body
1180     Purpose:    Prepare post body according to encoding type
1181     Input:        $formvars  - form variables
1182                 $formfiles - form upload files
1183     Output:        post body
1184 \*======================================================================*/
1185     
1186     function _prepare_post_body($formvars, $formfiles)
1187     {
1188         settype($formvars, "array");
1189         settype($formfiles, "array");
1190         $postdata = '';
1192         if (count($formvars) == 0 && count($formfiles) == 0)
1193             return;
1194         
1195         switch ($this->_submit_type) {
1196             case "application/x-www-form-urlencoded":
1197                 reset($formvars);
1198                 while(list($key,$val) = each($formvars)) {
1199                     if (is_array($val) || is_object($val)) {
1200                         while (list($cur_key, $cur_val) = each($val)) {
1201                             $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1202                         }
1203                     } else
1204                         $postdata .= urlencode($key)."=".urlencode($val)."&";
1205                 }
1206                 break;
1208             case "multipart/form-data":
1209                 $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1210                 
1211                 reset($formvars);
1212                 while(list($key,$val) = each($formvars)) {
1213                     if (is_array($val) || is_object($val)) {
1214                         while (list($cur_key, $cur_val) = each($val)) {
1215                             $postdata .= "--".$this->_mime_boundary."\r\n";
1216                             $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1217                             $postdata .= "$cur_val\r\n";
1218                         }
1219                     } else {
1220                         $postdata .= "--".$this->_mime_boundary."\r\n";
1221                         $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1222                         $postdata .= "$val\r\n";
1223                     }
1224                 }
1225                 
1226                 reset($formfiles);
1227                 while (list($field_name, $file_names) = each($formfiles)) {
1228                     settype($file_names, "array");
1229                     while (list(, $file_name) = each($file_names)) {
1230                         if (!is_readable($file_name)) continue;
1232                         $fp = fopen($file_name, "r");
1233                         $file_content = fread($fp, filesize($file_name));
1234                         fclose($fp);
1235                         $base_name = basename($file_name);
1237                         $postdata .= "--".$this->_mime_boundary."\r\n";
1238                         $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1239                         $postdata .= "$file_content\r\n";
1240                     }
1241                 }
1242                 $postdata .= "--".$this->_mime_boundary."--\r\n";
1243                 break;
1244         }
1246         return $postdata;
1247     }