4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016 Cool Dude 2k - http://idb.berlios.de/
13 Copyright 2016 Game Maker 2k - http://intdb.sourceforge.net/
14 Copyright 2016 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
16 $FileInfo: pywwwget.py - Last Update: 6/17/2016 Ver. 0.4.7 RC 1 - Author: cooldude2k $
19 from __future__
import division
, absolute_import
, print_function
;
20 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, gzip
, time
, argparse
, cgi
, subprocess
, imp
;
21 import logging
as log
;
24 imp
.find_module('requests');
29 havemechanize
= False;
31 imp
.find_module('mechanize');
35 havemechanize
= False;
36 if(sys
.version
[0]=="2"):
38 from cStringIO
import StringIO
;
40 from StringIO
import StringIO
;
41 # From http://python-future.org/compatible_idioms.html
42 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
43 from urllib
import urlencode
;
44 from urllib2
import urlopen
, Request
, HTTPError
;
45 import urllib2
, urlparse
, cookielib
;
46 if(sys
.version
[0]>="3"):
47 from io
import StringIO
, BytesIO
;
48 # From http://python-future.org/compatible_idioms.html
49 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
50 from urllib
.request
import urlopen
, Request
;
51 from urllib
.error
import HTTPError
;
52 import urllib
.request
as urllib2
;
53 import urllib
.parse
as urlparse
;
54 import http
.cookiejar
as cookielib
;
56 __program_name__
= "PyWWW-Get";
57 __project__
= __program_name__
;
58 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
59 __version_info__
= (0, 4, 7, "RC 1", 1);
60 __version_date_info__
= (2016, 6, 17, "RC 1", 1);
61 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
62 __revision__
= __version_info__
[3];
63 __revision_id__
= "$Id$";
64 if(__version_info__
[4]!=None):
65 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
66 if(__version_info__
[4]==None):
67 __version_date_plusrc__
= __version_date__
;
68 if(__version_info__
[3]!=None):
69 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
70 if(__version_info__
[3]==None):
71 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
73 tmpfileprefix
= "py"+str(sys
.version_info
[0])+"wwwget"+str(__version_info__
[0])+"-";
75 pytempdir
= tempfile
.gettempdir();
77 geturls_cj
= cookielib
.CookieJar();
78 geturls_ua_firefox_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0";
79 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.3";
80 geturls_ua_chrome_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36";
81 geturls_ua_chromium_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chromium/67.0.3396.99 Chrome/67.0.3396.99 Safari/537.36";
82 geturls_ua_palemoon_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.3";
83 geturls_ua_opera_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.54";
84 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.183 Safari/537.36 Vivaldi/1.96.1147.52";
85 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64; Trident/7.0; rv:11.0) like Gecko";
86 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134";
87 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
88 if(platform
.python_implementation()!=""):
89 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=platform
.python_implementation(), pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
90 if(platform
.python_implementation()==""):
91 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
="Python", pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
92 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
93 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
94 geturls_ua
= geturls_ua_firefox_windows7
;
95 geturls_headers_firefox_windows7
= {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
96 geturls_headers_seamonkey_windows7
= {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
97 geturls_headers_chrome_windows7
= {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
98 geturls_headers_chromium_windows7
= {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
99 geturls_headers_palemoon_windows7
= {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
100 geturls_headers_opera_windows7
= {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
101 geturls_headers_vivaldi_windows7
= {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
102 geturls_headers_internet_explorer_windows7
= {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
103 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
104 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
105 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
106 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
107 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
108 geturls_headers
= geturls_headers_firefox_windows7
;
109 geturls_download_sleep
= 0;
111 def add_url_param(url
, **params
):
113 parts
= list(urlparse
.urlsplit(url
));
114 d
= dict(cgi
.parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
116 parts
[n
]=urlencode(d
);
117 return urlparse
.urlunsplit(parts
);
119 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
120 def which_exec(execfile):
121 for path
in os
.environ
["PATH"].split(":"):
122 if os
.path
.exists(path
+ "/" + execfile):
123 return path
+ "/" + execfile;
125 def listize(varlist
):
133 newlistreg
.update({ilx
: varlist
[il
]});
134 newlistrev
.update({varlist
[il
]: ilx
});
137 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
140 def twolistize(varlist
):
150 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
151 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
152 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
153 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
156 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
157 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
158 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
161 def arglistize(proexec
, *varlist
):
165 newarglist
= [proexec
];
167 if varlist
[il
][0] is not None:
168 newarglist
.append(varlist
[il
][0]);
169 if varlist
[il
][1] is not None:
170 newarglist
.append(varlist
[il
][1]);
174 # hms_string by ArcGIS Python Recipes
175 # https://arcpy.wordpress.com/2012/04/20/146/
176 def hms_string(sec_elapsed
):
177 h
= int(sec_elapsed
/ (60 * 60));
178 m
= int((sec_elapsed
% (60 * 60)) / 60);
179 s
= sec_elapsed
% 60.0;
180 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
182 # get_readable_size by Lipis
183 # http://stackoverflow.com/posts/14998888/revisions
184 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
186 if(unit
!="IEC" and unit
!="SI"):
189 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
190 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
193 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
194 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
199 if abs(bytes
) < unitsize
:
200 strformat
= "%3."+str(precision
)+"f%s";
201 pre_return_val
= (strformat
% (bytes
, unit
));
202 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
203 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
204 alt_return_val
= pre_return_val
.split();
205 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
208 strformat
= "%."+str(precision
)+"f%s";
209 pre_return_val
= (strformat
% (bytes
, "YiB"));
210 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
211 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
212 alt_return_val
= pre_return_val
.split();
213 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
216 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
218 usehashtypes
= usehashtypes
.lower();
219 getfilesize
= os
.path
.getsize(infile
);
220 return_val
= get_readable_size(getfilesize
, precision
, unit
);
222 hashtypelist
= usehashtypes
.split(",");
223 openfile
= open(infile
, "rb");
224 filecontents
= openfile
.read();
227 listnumend
= len(hashtypelist
);
228 while(listnumcount
< listnumend
):
229 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
230 hashtypelistup
= hashtypelistlow
.upper();
231 filehash
= hashlib
.new(hashtypelistup
);
232 filehash
.update(filecontents
);
233 filegethash
= filehash
.hexdigest();
234 return_val
.update({hashtypelistup
: filegethash
});
238 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
240 usehashtypes
= usehashtypes
.lower();
241 getfilesize
= len(instring
);
242 return_val
= get_readable_size(getfilesize
, precision
, unit
);
244 hashtypelist
= usehashtypes
.split(",");
246 listnumend
= len(hashtypelist
);
247 while(listnumcount
< listnumend
):
248 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
249 hashtypelistup
= hashtypelistlow
.upper();
250 filehash
= hashlib
.new(hashtypelistup
);
251 if(sys
.version
[0]=="2"):
252 filehash
.update(instring
);
253 if(sys
.version
[0]>="3"):
254 filehash
.update(instring
.encode('utf-8'));
255 filegethash
= filehash
.hexdigest();
256 return_val
.update({hashtypelistup
: filegethash
});
260 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
261 if isinstance(headers
, dict):
263 if(sys
.version
[0]=="2"):
264 for headkey
, headvalue
in headers
.iteritems():
265 returnval
.append((headkey
, headvalue
));
266 if(sys
.version
[0]>="3"):
267 for headkey
, headvalue
in headers
.items():
268 returnval
.append((headkey
, headvalue
));
269 elif isinstance(headers
, list):
275 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
276 if isinstance(headers
, dict):
278 if(sys
.version
[0]=="2"):
279 for headkey
, headvalue
in headers
.iteritems():
280 returnval
.append(headkey
+": "+headvalue
);
281 if(sys
.version
[0]>="3"):
282 for headkey
, headvalue
in headers
.items():
283 returnval
.append(headkey
+": "+headvalue
);
284 elif isinstance(headers
, list):
290 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
291 if isinstance(headers
, list):
296 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
298 elif isinstance(headers
, dict):
304 def get_httplib_support(checkvalue
=None):
305 global haverequests
, havemechanize
;
307 returnval
.append("urllib");
308 if(haverequests
==True):
309 returnval
.append("requests");
310 if(havemechanize
==True):
311 returnval
.append("mechanize");
312 if(not checkvalue
==None):
313 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
314 checkvalue
= "urllib";
315 if(checkvalue
in returnval
):
321 def check_httplib_support(checkvalue
="urllib"):
322 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
323 checkvalue
= "urllib";
324 returnval
= get_httplib_support(checkvalue
);
327 def get_httplib_support_list():
328 returnval
= get_httplib_support(None);
331 def download_from_url(httpurl
, httpheaders
, httpcookie
, httplibuse
="urllib", sleep
=-1):
332 global geturls_download_sleep
, haverequests
, havemechanize
;
334 sleep
= geturls_download_sleep
;
335 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
336 httplibuse
= "urllib";
337 if(haverequests
==False and httplibuse
=="requests"):
338 httplibuse
= "urllib";
339 if(havemechanize
==False and httplibuse
=="mechanize"):
340 httplibuse
= "urllib";
341 if(httplibuse
=="urllib"):
342 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, sleep
);
343 elif(httplibuse
=="requests"):
344 returnval
= download_from_url_with_requests(httpurl
, httpheaders
, httpcookie
, sleep
);
345 elif(httplibuse
=="mechanize"):
346 returnval
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpcookie
, sleep
);
351 def download_from_url_file(httpurl
, httpheaders
, httpcookie
, httplibuse
="urllib", buffersize
=524288, sleep
=-1):
352 global geturls_download_sleep
, haverequests
, havemechanize
;
354 sleep
= geturls_download_sleep
;
355 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
356 httplibuse
= "urllib";
357 if(haverequests
==False and httplibuse
=="requests"):
358 httplibuse
= "urllib";
359 if(havemechanize
==False and httplibuse
=="mechanize"):
360 httplibuse
= "urllib";
361 if(httplibuse
=="urllib"):
362 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
, sleep
);
363 elif(httplibuse
=="requests"):
364 returnval
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, buffersize
, sleep
);
365 elif(httplibuse
=="mechanize"):
366 returnval
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, buffersize
, sleep
);
371 def download_from_url_to_file(httpurl
, httpheaders
, httpcookie
, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
372 global geturls_download_sleep
, haverequests
, havemechanize
;
374 sleep
= geturls_download_sleep
;
375 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
376 httplibuse
= "urllib";
377 if(haverequests
==False and httplibuse
=="requests"):
378 httplibuse
= "urllib";
379 if(havemechanize
==False and httplibuse
=="mechanize"):
380 httplibuse
= "urllib";
381 if(httplibuse
=="urllib"):
382 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, outfile
, outpath
, buffersize
, sleep
);
383 elif(httplibuse
=="requests"):
384 returnval
= download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpcookie
, outfile
, outpath
, buffersize
, sleep
);
385 elif(httplibuse
=="mechanize"):
386 returnval
= download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, outfile
, outpath
, buffersize
, sleep
);
391 def download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, sleep
=-1):
392 global geturls_download_sleep
;
394 sleep
= geturls_download_sleep
;
395 geturls_opener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(httpcookie
));
396 if(isinstance(httpheaders
, dict)):
397 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
398 geturls_opener
.addheaders
= httpheaders
;
400 geturls_text
= geturls_opener
.open(httpurl
);
401 log
.info("Downloading URL "+httpurl
);
402 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
403 if(sys
.version
[0]=="2"):
404 strbuf
= StringIO(geturls_text
.read());
405 if(sys
.version
[0]>="3"):
406 strbuf
= BytesIO(geturls_text
.read());
407 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
408 returnval_content
= gzstrbuf
.read()[:];
409 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
410 returnval_content
= geturls_text
.read()[:];
411 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.getcode()};
412 geturls_text
.close();
415 def download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
=524288, sleep
=-1):
416 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
417 exec_time_start
= time
.time();
418 myhash
= hashlib
.new("sha1");
419 if(sys
.version
[0]=="2"):
420 myhash
.update(httpurl
);
421 myhash
.update(str(buffersize
));
422 myhash
.update(str(exec_time_start
));
423 if(sys
.version
[0]>="3"):
424 myhash
.update(httpurl
.encode('utf-8'));
425 myhash
.update(str(buffersize
).encode('utf-8'));
426 myhash
.update(str(exec_time_start
).encode('utf-8'));
427 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
429 sleep
= geturls_download_sleep
;
430 geturls_opener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(httpcookie
));
431 if(isinstance(httpheaders
, dict)):
432 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
433 geturls_opener
.addheaders
= httpheaders
;
435 geturls_text
= geturls_opener
.open(httpurl
);
436 downloadsize
= geturls_text
.info().get('Content-Length');
437 if(downloadsize
is not None):
438 downloadsize
= int(downloadsize
);
439 if downloadsize
is None: downloadsize
= 0;
442 log
.info("Downloading URL "+httpurl
);
443 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
444 tmpfilename
= f
.name
;
445 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.getcode()};
447 databytes
= geturls_text
.read(buffersize
);
448 if not databytes
: break;
449 datasize
= len(databytes
);
450 fulldatasize
= datasize
+ fulldatasize
;
453 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
454 downloaddiff
= fulldatasize
- prevdownsize
;
455 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
456 prevdownsize
= fulldatasize
;
459 geturls_text
.close();
460 exec_time_end
= time
.time();
461 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
462 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
465 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
466 global geturls_download_sleep
;
468 sleep
= geturls_download_sleep
;
469 if(not outfile
=="-"):
470 outpath
= outpath
.rstrip(os
.path
.sep
);
471 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
472 if(not os
.path
.exists(outpath
)):
473 os
.makedirs(outpath
);
474 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
476 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
478 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
479 tmpfilename
= pretmpfilename
['Filename'];
480 downloadsize
= os
.path
.getsize(tmpfilename
);
482 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
483 exec_time_start
= time
.time();
484 shutil
.move(tmpfilename
, filepath
);
485 exec_time_end
= time
.time();
486 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
487 if(os
.path
.exists(tmpfilename
)==True):
488 os
.remove(tmpfilename
);
489 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
490 if(outfile
=="-" and sys
.version
[0]=="2"):
491 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
492 tmpfilename
= pretmpfilename
['Filename'];
493 downloadsize
= os
.path
.getsize(tmpfilename
);
496 exec_time_start
= time
.time();
497 with
open(tmpfilename
, 'rb') as ft
:
500 databytes
= ft
.read(buffersize
[1]);
501 if not databytes
: break;
502 datasize
= len(databytes
);
503 fulldatasize
= datasize
+ fulldatasize
;
506 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
507 downloaddiff
= fulldatasize
- prevdownsize
;
508 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
509 prevdownsize
= fulldatasize
;
512 fdata
= f
.getvalue();
515 os
.remove(tmpfilename
);
516 exec_time_end
= time
.time();
517 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
518 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
519 if(outfile
=="-" and sys
.version
[0]>="3"):
520 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
521 tmpfilename
= pretmpfilename
['Filename'];
522 downloadsize
= os
.path
.getsize(tmpfilename
);
525 exec_time_start
= time
.time();
526 with
open(tmpfilename
, 'rb') as ft
:
529 databytes
= ft
.read(buffersize
[1]);
530 if not databytes
: break;
531 datasize
= len(databytes
);
532 fulldatasize
= datasize
+ fulldatasize
;
535 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
536 downloaddiff
= fulldatasize
- prevdownsize
;
537 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
538 prevdownsize
= fulldatasize
;
541 fdata
= f
.getvalue();
544 os
.remove(tmpfilename
);
545 exec_time_end
= time
.time();
546 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
547 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
550 if(haverequests
==True):
551 def download_from_url_with_requests(httpurl
, httpheaders
, httpcookie
, sleep
=-1):
552 global geturls_download_sleep
;
554 sleep
= geturls_download_sleep
;
555 if(isinstance(httpheaders
, list)):
556 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
558 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
559 log
.info("Downloading URL "+httpurl
);
560 if(geturls_text
.headers
.get('Content-Type')=="gzip" or geturls_text
.headers
.get('Content-Type')=="deflate"):
561 if(sys
.version
[0]=="2"):
562 strbuf
= StringIO(geturls_text
.content
);
563 if(sys
.version
[0]>="3"):
564 strbuf
= BytesIO(geturls_text
.content
);
565 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
566 returnval_content
= gzstrbuf
.content
[:];
567 if(geturls_text
.headers
.get('Content-Type')!="gzip" and geturls_text
.headers
.get('Content-Type')!="deflate"):
568 returnval_content
= geturls_text
.content
[:];
569 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.headers
), 'URL': geturls_text
.url
, 'Code': geturls_text
.status_code
};
570 geturls_text
.close();
573 if(haverequests
==False):
574 def download_from_url_with_requests(httpurl
, httpheaders
, httpcookie
, sleep
=-1):
575 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, sleep
)
578 if(haverequests
==True):
579 def download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, buffersize
=524288, sleep
=-1):
580 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
581 exec_time_start
= time
.time();
582 myhash
= hashlib
.new("sha1");
583 if(sys
.version
[0]=="2"):
584 myhash
.update(httpurl
);
585 myhash
.update(str(buffersize
));
586 myhash
.update(str(exec_time_start
));
587 if(sys
.version
[0]>="3"):
588 myhash
.update(httpurl
.encode('utf-8'));
589 myhash
.update(str(buffersize
).encode('utf-8'));
590 myhash
.update(str(exec_time_start
).encode('utf-8'));
591 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
593 sleep
= geturls_download_sleep
;
594 if(isinstance(httpheaders
, list)):
595 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
597 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
598 downloadsize
= int(geturls_text
.headers
.get('Content-Length'));
599 if(downloadsize
is not None):
600 downloadsize
= int(downloadsize
);
601 if downloadsize
is None: downloadsize
= 0;
604 log
.info("Downloading URL "+httpurl
);
605 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
606 tmpfilename
= f
.name
;
607 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.headers
), 'URL': geturls_text
.url
, 'Code': geturls_text
.status_code
};
608 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
609 datasize
= len(databytes
);
610 fulldatasize
= datasize
+ fulldatasize
;
613 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
614 downloaddiff
= fulldatasize
- prevdownsize
;
615 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
616 prevdownsize
= fulldatasize
;
619 geturls_text
.close();
620 exec_time_end
= time
.time();
621 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
622 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
625 if(haverequests
==False):
626 def download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, buffersize
=524288, sleep
=-1):
627 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
, sleep
)
630 if(haverequests
==True):
631 def download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpcookie
, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
632 global geturls_download_sleep
;
634 sleep
= geturls_download_sleep
;
635 if(not outfile
=="-"):
636 outpath
= outpath
.rstrip(os
.path
.sep
);
637 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
638 if(not os
.path
.exists(outpath
)):
639 os
.makedirs(outpath
);
640 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
642 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
644 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
645 tmpfilename
= pretmpfilename
['Filename'];
646 downloadsize
= os
.path
.getsize(tmpfilename
);
648 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
649 exec_time_start
= time
.time();
650 shutil
.move(tmpfilename
, filepath
);
651 exec_time_end
= time
.time();
652 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
653 if(os
.path
.exists(tmpfilename
)==True):
654 os
.remove(tmpfilename
);
655 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
656 if(outfile
=="-" and sys
.version
[0]=="2"):
657 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
658 tmpfilename
= pretmpfilename
['Filename'];
659 downloadsize
= os
.path
.getsize(tmpfilename
);
662 exec_time_start
= time
.time();
663 with
open(tmpfilename
, 'rb') as ft
:
666 databytes
= ft
.read(buffersize
[1]);
667 if not databytes
: break;
668 datasize
= len(databytes
);
669 fulldatasize
= datasize
+ fulldatasize
;
672 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
673 downloaddiff
= fulldatasize
- prevdownsize
;
674 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
675 prevdownsize
= fulldatasize
;
678 fdata
= f
.getvalue();
681 os
.remove(tmpfilename
);
682 exec_time_end
= time
.time();
683 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
684 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
685 if(outfile
=="-" and sys
.version
[0]>="3"):
686 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
687 tmpfilename
= pretmpfilename
['Filename'];
688 downloadsize
= os
.path
.getsize(tmpfilename
);
691 exec_time_start
= time
.time();
692 with
open(tmpfilename
, 'rb') as ft
:
695 databytes
= ft
.read(buffersize
[1]);
696 if not databytes
: break;
697 datasize
= len(databytes
);
698 fulldatasize
= datasize
+ fulldatasize
;
701 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
702 downloaddiff
= fulldatasize
- prevdownsize
;
703 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
704 prevdownsize
= fulldatasize
;
707 fdata
= f
.getvalue();
710 os
.remove(tmpfilename
);
711 exec_time_end
= time
.time();
712 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
713 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
716 if(haverequests
==False):
717 def download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpcookie
, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
718 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
, outfile
, outpath
, sleep
)
721 if(havemechanize
==True):
722 def download_from_url_with_mechanize(httpurl
, httpheaders
, httpcookie
, sleep
=-1):
723 global geturls_download_sleep
;
725 sleep
= geturls_download_sleep
;
726 geturls_opener
= mechanize
.Browser();
727 if(isinstance(httpheaders
, dict)):
728 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
730 geturls_opener
.addheaders
= httpheaders
;
731 geturls_opener
.set_cookiejar(httpcookie
);
732 geturls_opener
.set_handle_robots(False);
733 geturls_text
= geturls_opener
.open(httpurl
);
734 log
.info("Downloading URL "+httpurl
);
735 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
736 if(sys
.version
[0]=="2"):
737 strbuf
= StringIO(geturls_text
.read());
738 if(sys
.version
[0]>="3"):
739 strbuf
= BytesIO(geturls_text
.read());
740 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
741 returnval_content
= gzstrbuf
.read()[:];
742 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
743 returnval_content
= geturls_text
.read()[:];
744 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.code
};
745 geturls_text
.close();
748 if(havemechanize
==False):
749 def download_from_url_with_mechanize(httpurl
, httpheaders
, httpcookie
, sleep
=-1):
750 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, sleep
)
753 if(havemechanize
==True):
754 def download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, buffersize
=524288, sleep
=-1):
755 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
756 exec_time_start
= time
.time();
757 myhash
= hashlib
.new("sha1");
758 if(sys
.version
[0]=="2"):
759 myhash
.update(httpurl
);
760 myhash
.update(str(buffersize
));
761 myhash
.update(str(exec_time_start
));
762 if(sys
.version
[0]>="3"):
763 myhash
.update(httpurl
.encode('utf-8'));
764 myhash
.update(str(buffersize
).encode('utf-8'));
765 myhash
.update(str(exec_time_start
).encode('utf-8'));
766 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
768 sleep
= geturls_download_sleep
;
769 geturls_opener
= mechanize
.Browser();
770 if(isinstance(httpheaders
, dict)):
771 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
773 geturls_opener
.addheaders
= httpheaders
;
774 geturls_opener
.set_cookiejar(httpcookie
);
775 geturls_opener
.set_handle_robots(False);
776 geturls_text
= geturls_opener
.open(httpurl
);
777 downloadsize
= int(geturls_text
.info().get('Content-Length'));
778 if(downloadsize
is not None):
779 downloadsize
= int(downloadsize
);
780 if downloadsize
is None: downloadsize
= 0;
783 log
.info("Downloading URL "+httpurl
);
784 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
785 tmpfilename
= f
.name
;
786 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.code
};
788 databytes
= geturls_text
.read(buffersize
);
789 if not databytes
: break;
790 datasize
= len(databytes
);
791 fulldatasize
= datasize
+ fulldatasize
;
794 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
795 downloaddiff
= fulldatasize
- prevdownsize
;
796 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
797 prevdownsize
= fulldatasize
;
800 geturls_text
.close();
801 exec_time_end
= time
.time();
802 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
803 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
806 if(havemechanize
==False):
807 def download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, buffersize
=524288, sleep
=-1):
808 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
, sleep
)
811 if(havemechanize
==True):
812 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
813 global geturls_download_sleep
;
815 sleep
= geturls_download_sleep
;
816 if(not outfile
=="-"):
817 outpath
= outpath
.rstrip(os
.path
.sep
);
818 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
819 if(not os
.path
.exists(outpath
)):
820 os
.makedirs(outpath
);
821 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
823 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
825 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
826 tmpfilename
= pretmpfilename
['Filename'];
827 downloadsize
= os
.path
.getsize(tmpfilename
);
829 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
830 exec_time_start
= time
.time();
831 shutil
.move(tmpfilename
, filepath
);
832 exec_time_end
= time
.time();
833 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
834 if(os
.path
.exists(tmpfilename
)==True):
835 os
.remove(tmpfilename
);
836 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
837 if(outfile
=="-" and sys
.version
[0]=="2"):
838 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
839 tmpfilename
= pretmpfilename
['Filename'];
840 downloadsize
= os
.path
.getsize(tmpfilename
);
843 exec_time_start
= time
.time();
844 with
open(tmpfilename
, 'rb') as ft
:
847 databytes
= ft
.read(buffersize
[1]);
848 if not databytes
: break;
849 datasize
= len(databytes
);
850 fulldatasize
= datasize
+ fulldatasize
;
853 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
854 downloaddiff
= fulldatasize
- prevdownsize
;
855 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
856 prevdownsize
= fulldatasize
;
859 fdata
= f
.getvalue();
862 os
.remove(tmpfilename
);
863 exec_time_end
= time
.time();
864 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
865 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
866 if(outfile
=="-" and sys
.version
[0]>="3"):
867 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
868 tmpfilename
= pretmpfilename
['Filename'];
869 downloadsize
= os
.path
.getsize(tmpfilename
);
872 exec_time_start
= time
.time();
873 with
open(tmpfilename
, 'rb') as ft
:
876 databytes
= ft
.read(buffersize
[1]);
877 if not databytes
: break;
878 datasize
= len(databytes
);
879 fulldatasize
= datasize
+ fulldatasize
;
882 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
883 downloaddiff
= fulldatasize
- prevdownsize
;
884 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
885 prevdownsize
= fulldatasize
;
888 fdata
= f
.getvalue();
891 os
.remove(tmpfilename
);
892 exec_time_end
= time
.time();
893 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
894 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
897 if(havemechanize
==False):
898 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
899 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
, outfile
, outpath
, sleep
)