4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016 Cool Dude 2k - http://idb.berlios.de/
13 Copyright 2016 Game Maker 2k - http://intdb.sourceforge.net/
14 Copyright 2016 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
16 $FileInfo: pywwwget.py - Last Update: 6/17/2016 Ver. 0.4.7 RC 1 - Author: cooldude2k $
19 from __future__
import division
, absolute_import
, print_function
;
20 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, gzip
, time
, argparse
, cgi
, subprocess
, imp
;
21 import logging
as log
;
24 imp
.find_module('requests');
29 havemechanize
= False;
31 imp
.find_module('mechanize');
35 havemechanize
= False;
36 if(sys
.version
[0]=="2"):
38 from cStringIO
import StringIO
;
40 from StringIO
import StringIO
;
41 # From http://python-future.org/compatible_idioms.html
42 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
43 from urllib
import urlencode
;
44 from urllib2
import urlopen
, Request
, HTTPError
;
45 import urllib2
, urlparse
, cookielib
;
46 if(sys
.version
[0]>="3"):
47 from io
import StringIO
, BytesIO
;
48 # From http://python-future.org/compatible_idioms.html
49 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
50 from urllib
.request
import urlopen
, Request
;
51 from urllib
.error
import HTTPError
;
52 import urllib
.request
as urllib2
;
53 import urllib
.parse
as urlparse
;
54 import http
.cookiejar
as cookielib
;
56 __program_name__
= "PyWWW-Get";
57 __project__
= __program_name__
;
58 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
59 __version_info__
= (0, 4, 7, "RC 1", 1);
60 __version_date_info__
= (2016, 6, 17, "RC 1", 1);
61 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
62 if(__version_info__
[4]!=None):
63 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
64 if(__version_info__
[4]==None):
65 __version_date_plusrc__
= __version_date__
;
66 if(__version_info__
[3]!=None):
67 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
68 if(__version_info__
[3]==None):
69 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
71 tmpfileprefix
= "py"+str(sys
.version_info
[0])+"wwwget"+str(__version_info__
[0])+"-";
73 pytempdir
= tempfile
.gettempdir();
75 geturls_cj
= cookielib
.CookieJar();
76 geturls_ua_firefox_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0";
77 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.3";
78 geturls_ua_chrome_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36";
79 geturls_ua_chromium_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chromium/67.0.3396.99 Chrome/67.0.3396.99 Safari/537.36";
80 geturls_ua_midori_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/538.15 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/538.15 Midori/0.5";
81 geturls_ua_palemoon_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.3";
82 geturls_ua_opera_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.54";
83 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36 Vivaldi/1.2.490.43";
84 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64; Trident/7.0; rv:11.0) like Gecko";
85 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134";
86 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
87 if(platform
.python_implementation()!=""):
88 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=platform
.python_implementation(), pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
89 if(platform
.python_implementation()==""):
90 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
="Python", pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
91 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
92 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
93 geturls_ua
= geturls_ua_firefox_windows7
;
94 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
95 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
96 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
97 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
98 geturls_headers_midori_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_midori_windows7
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
99 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
100 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
101 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
102 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
103 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
104 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
105 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
106 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
107 geturls_headers
= geturls_headers_firefox_windows7
;
108 geturls_download_sleep
= 0;
110 def add_url_param(url
, **params
):
112 parts
= list(urlparse
.urlsplit(url
));
113 d
= dict(cgi
.parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
115 parts
[n
]=urlencode(d
);
116 return urlparse
.urlunsplit(parts
);
118 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
119 def which_exec(execfile):
120 for path
in os
.environ
["PATH"].split(":"):
121 if os
.path
.exists(path
+ "/" + execfile):
122 return path
+ "/" + execfile;
124 def listize(varlist
):
132 newlistreg
.update({ilx
: varlist
[il
]});
133 newlistrev
.update({varlist
[il
]: ilx
});
136 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
139 def twolistize(varlist
):
149 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
150 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
151 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
152 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
155 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
156 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
157 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
160 def arglistize(proexec
, *varlist
):
164 newarglist
= [proexec
];
166 if varlist
[il
][0] is not None:
167 newarglist
.append(varlist
[il
][0]);
168 if varlist
[il
][1] is not None:
169 newarglist
.append(varlist
[il
][1]);
173 # hms_string by ArcGIS Python Recipes
174 # https://arcpy.wordpress.com/2012/04/20/146/
175 def hms_string(sec_elapsed
):
176 h
= int(sec_elapsed
/ (60 * 60));
177 m
= int((sec_elapsed
% (60 * 60)) / 60);
178 s
= sec_elapsed
% 60.0;
179 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
181 # get_readable_size by Lipis
182 # http://stackoverflow.com/posts/14998888/revisions
183 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
185 if(unit
!="IEC" and unit
!="SI"):
188 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
189 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
192 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
193 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
198 if abs(bytes
) < unitsize
:
199 strformat
= "%3."+str(precision
)+"f%s";
200 pre_return_val
= (strformat
% (bytes
, unit
));
201 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
202 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
203 alt_return_val
= pre_return_val
.split();
204 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
207 strformat
= "%."+str(precision
)+"f%s";
208 pre_return_val
= (strformat
% (bytes
, "YiB"));
209 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
210 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
211 alt_return_val
= pre_return_val
.split();
212 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
215 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
217 usehashtypes
= usehashtypes
.lower();
218 getfilesize
= os
.path
.getsize(infile
);
219 return_val
= get_readable_size(getfilesize
, precision
, unit
);
221 hashtypelist
= usehashtypes
.split(",");
222 openfile
= open(infile
, "rb");
223 filecontents
= openfile
.read();
226 listnumend
= len(hashtypelist
);
227 while(listnumcount
< listnumend
):
228 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
229 hashtypelistup
= hashtypelistlow
.upper();
230 filehash
= hashlib
.new(hashtypelistup
);
231 filehash
.update(filecontents
);
232 filegethash
= filehash
.hexdigest();
233 return_val
.update({hashtypelistup
: filegethash
});
237 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
239 usehashtypes
= usehashtypes
.lower();
240 getfilesize
= len(instring
);
241 return_val
= get_readable_size(getfilesize
, precision
, unit
);
243 hashtypelist
= usehashtypes
.split(",");
245 listnumend
= len(hashtypelist
);
246 while(listnumcount
< listnumend
):
247 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
248 hashtypelistup
= hashtypelistlow
.upper();
249 filehash
= hashlib
.new(hashtypelistup
);
250 if(sys
.version
[0]=="2"):
251 filehash
.update(instring
);
252 if(sys
.version
[0]>="3"):
253 filehash
.update(instring
.encode('utf-8'));
254 filegethash
= filehash
.hexdigest();
255 return_val
.update({hashtypelistup
: filegethash
});
259 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
260 if isinstance(headers
, dict):
262 if(sys
.version
[0]=="2"):
263 for headkey
, headvalue
in headers
.iteritems():
264 returnval
.append((headkey
, headvalue
));
265 if(sys
.version
[0]>="3"):
266 for headkey
, headvalue
in headers
.items():
267 returnval
.append((headkey
, headvalue
));
268 elif isinstance(headers
, list):
274 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
275 if isinstance(headers
, dict):
277 if(sys
.version
[0]=="2"):
278 for headkey
, headvalue
in headers
.iteritems():
279 returnval
.append(headkey
+": "+headvalue
);
280 if(sys
.version
[0]>="3"):
281 for headkey
, headvalue
in headers
.items():
282 returnval
.append(headkey
+": "+headvalue
);
283 elif isinstance(headers
, list):
289 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
290 if isinstance(headers
, list):
295 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
297 elif isinstance(headers
, dict):
303 def get_httplib_support(checkvalue
=None):
304 global haverequests
, havemechanize
;
306 returnval
.append("urllib");
307 if(haverequests
==True):
308 returnval
.append("requests");
309 if(havemechanize
==True):
310 returnval
.append("mechanize");
311 if(not checkvalue
==None):
312 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
313 checkvalue
= "urllib";
314 if(checkvalue
in returnval
):
320 def check_httplib_support(checkvalue
="urllib"):
321 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
322 checkvalue
= "urllib";
323 returnval
= get_httplib_support(checkvalue
);
326 def get_httplib_support_list():
327 returnval
= get_httplib_support(None);
330 def download_from_url(httpurl
, httpheaders
, httpcookie
, httplibuse
="urllib", sleep
=-1):
331 global geturls_download_sleep
, haverequests
, havemechanize
;
333 sleep
= geturls_download_sleep
;
334 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
335 httplibuse
= "urllib";
336 if(haverequests
==False and httplibuse
=="requests"):
337 httplibuse
= "urllib";
338 if(havemechanize
==False and httplibuse
=="mechanize"):
339 httplibuse
= "urllib";
340 if(httplibuse
=="urllib"):
341 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, sleep
);
342 elif(httplibuse
=="requests"):
343 returnval
= download_from_url_with_requests(httpurl
, httpheaders
, httpcookie
, sleep
);
344 elif(httplibuse
=="mechanize"):
345 returnval
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpcookie
, sleep
);
350 def download_from_url_file(httpurl
, httpheaders
, httpcookie
, httplibuse
="urllib", buffersize
=524288, sleep
=-1):
351 global geturls_download_sleep
, haverequests
, havemechanize
;
353 sleep
= geturls_download_sleep
;
354 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
355 httplibuse
= "urllib";
356 if(haverequests
==False and httplibuse
=="requests"):
357 httplibuse
= "urllib";
358 if(havemechanize
==False and httplibuse
=="mechanize"):
359 httplibuse
= "urllib";
360 if(httplibuse
=="urllib"):
361 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
, sleep
);
362 elif(httplibuse
=="requests"):
363 returnval
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, buffersize
, sleep
);
364 elif(httplibuse
=="mechanize"):
365 returnval
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, buffersize
, sleep
);
370 def download_from_url_to_file(httpurl
, httpheaders
, httpcookie
, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
371 global geturls_download_sleep
, haverequests
, havemechanize
;
373 sleep
= geturls_download_sleep
;
374 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
375 httplibuse
= "urllib";
376 if(haverequests
==False and httplibuse
=="requests"):
377 httplibuse
= "urllib";
378 if(havemechanize
==False and httplibuse
=="mechanize"):
379 httplibuse
= "urllib";
380 if(httplibuse
=="urllib"):
381 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, outfile
, outpath
, buffersize
, sleep
);
382 elif(httplibuse
=="requests"):
383 returnval
= download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpcookie
, outfile
, outpath
, buffersize
, sleep
);
384 elif(httplibuse
=="mechanize"):
385 returnval
= download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, outfile
, outpath
, buffersize
, sleep
);
390 def download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, sleep
=-1):
391 global geturls_download_sleep
;
393 sleep
= geturls_download_sleep
;
394 geturls_opener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(httpcookie
));
395 if(isinstance(httpheaders
, dict)):
396 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
397 geturls_opener
.addheaders
= httpheaders
;
399 geturls_text
= geturls_opener
.open(httpurl
);
400 log
.info("Downloading URL "+httpurl
);
401 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
402 if(sys
.version
[0]=="2"):
403 strbuf
= StringIO(geturls_text
.read());
404 if(sys
.version
[0]>="3"):
405 strbuf
= BytesIO(geturls_text
.read());
406 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
407 returnval_content
= gzstrbuf
.read()[:];
408 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
409 returnval_content
= geturls_text
.read()[:];
410 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.getcode()};
411 geturls_text
.close();
414 def download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
=524288, sleep
=-1):
415 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
416 exec_time_start
= time
.time();
417 myhash
= hashlib
.new("sha1");
418 if(sys
.version
[0]=="2"):
419 myhash
.update(httpurl
);
420 myhash
.update(str(buffersize
));
421 myhash
.update(str(exec_time_start
));
422 if(sys
.version
[0]>="3"):
423 myhash
.update(httpurl
.encode('utf-8'));
424 myhash
.update(str(buffersize
).encode('utf-8'));
425 myhash
.update(str(exec_time_start
).encode('utf-8'));
426 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
428 sleep
= geturls_download_sleep
;
429 geturls_opener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(httpcookie
));
430 if(isinstance(httpheaders
, dict)):
431 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
432 geturls_opener
.addheaders
= httpheaders
;
434 geturls_text
= geturls_opener
.open(httpurl
);
435 downloadsize
= geturls_text
.info().get('Content-Length');
436 if(downloadsize
is not None):
437 downloadsize
= int(downloadsize
);
438 if downloadsize
is None: downloadsize
= 0;
441 log
.info("Downloading URL "+httpurl
);
442 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
443 tmpfilename
= f
.name
;
444 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.getcode()};
446 databytes
= geturls_text
.read(buffersize
);
447 if not databytes
: break;
448 datasize
= len(databytes
);
449 fulldatasize
= datasize
+ fulldatasize
;
452 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
453 downloaddiff
= fulldatasize
- prevdownsize
;
454 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
455 prevdownsize
= fulldatasize
;
458 geturls_text
.close();
459 exec_time_end
= time
.time();
460 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
461 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
464 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
465 global geturls_download_sleep
;
467 sleep
= geturls_download_sleep
;
468 if(not outfile
=="-"):
469 outpath
= outpath
.rstrip(os
.path
.sep
);
470 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
471 if(not os
.path
.exists(outpath
)):
472 os
.makedirs(outpath
);
473 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
475 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
477 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
478 tmpfilename
= pretmpfilename
['Filename'];
479 downloadsize
= os
.path
.getsize(tmpfilename
);
481 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
482 exec_time_start
= time
.time();
483 shutil
.move(tmpfilename
, filepath
);
484 exec_time_end
= time
.time();
485 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
486 if(os
.path
.exists(tmpfilename
)==True):
487 os
.remove(tmpfilename
);
488 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
489 if(outfile
=="-" and sys
.version
[0]=="2"):
490 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
491 tmpfilename
= pretmpfilename
['Filename'];
492 downloadsize
= os
.path
.getsize(tmpfilename
);
495 exec_time_start
= time
.time();
496 with
open(tmpfilename
, 'rb') as ft
:
499 databytes
= ft
.read(buffersize
[1]);
500 if not databytes
: break;
501 datasize
= len(databytes
);
502 fulldatasize
= datasize
+ fulldatasize
;
505 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
506 downloaddiff
= fulldatasize
- prevdownsize
;
507 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
508 prevdownsize
= fulldatasize
;
511 fdata
= f
.getvalue();
514 os
.remove(tmpfilename
);
515 exec_time_end
= time
.time();
516 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
517 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
518 if(outfile
=="-" and sys
.version
[0]>="3"):
519 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
520 tmpfilename
= pretmpfilename
['Filename'];
521 downloadsize
= os
.path
.getsize(tmpfilename
);
524 exec_time_start
= time
.time();
525 with
open(tmpfilename
, 'rb') as ft
:
528 databytes
= ft
.read(buffersize
[1]);
529 if not databytes
: break;
530 datasize
= len(databytes
);
531 fulldatasize
= datasize
+ fulldatasize
;
534 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
535 downloaddiff
= fulldatasize
- prevdownsize
;
536 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
537 prevdownsize
= fulldatasize
;
540 fdata
= f
.getvalue();
543 os
.remove(tmpfilename
);
544 exec_time_end
= time
.time();
545 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
546 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
549 if(haverequests
==True):
550 def download_from_url_with_requests(httpurl
, httpheaders
, httpcookie
, sleep
=-1):
551 global geturls_download_sleep
;
553 sleep
= geturls_download_sleep
;
554 if(isinstance(httpheaders
, list)):
555 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
557 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
558 log
.info("Downloading URL "+httpurl
);
559 if(geturls_text
.headers
.get('Content-Type')=="gzip" or geturls_text
.headers
.get('Content-Type')=="deflate"):
560 if(sys
.version
[0]=="2"):
561 strbuf
= StringIO(geturls_text
.content
);
562 if(sys
.version
[0]>="3"):
563 strbuf
= BytesIO(geturls_text
.content
);
564 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
565 returnval_content
= gzstrbuf
.content
[:];
566 if(geturls_text
.headers
.get('Content-Type')!="gzip" and geturls_text
.headers
.get('Content-Type')!="deflate"):
567 returnval_content
= geturls_text
.content
[:];
568 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.headers
), 'URL': geturls_text
.url
, 'Code': geturls_text
.status_code
};
569 geturls_text
.close();
572 if(haverequests
==False):
573 def download_from_url_with_requests(httpurl
, httpheaders
, httpcookie
, sleep
=-1):
574 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, sleep
)
577 if(haverequests
==True):
578 def download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, buffersize
=524288, sleep
=-1):
579 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
580 exec_time_start
= time
.time();
581 myhash
= hashlib
.new("sha1");
582 if(sys
.version
[0]=="2"):
583 myhash
.update(httpurl
);
584 myhash
.update(str(buffersize
));
585 myhash
.update(str(exec_time_start
));
586 if(sys
.version
[0]>="3"):
587 myhash
.update(httpurl
.encode('utf-8'));
588 myhash
.update(str(buffersize
).encode('utf-8'));
589 myhash
.update(str(exec_time_start
).encode('utf-8'));
590 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
592 sleep
= geturls_download_sleep
;
593 if(isinstance(httpheaders
, list)):
594 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
596 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
597 downloadsize
= int(geturls_text
.headers
.get('Content-Length'));
598 if(downloadsize
is not None):
599 downloadsize
= int(downloadsize
);
600 if downloadsize
is None: downloadsize
= 0;
603 log
.info("Downloading URL "+httpurl
);
604 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
605 tmpfilename
= f
.name
;
606 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.headers
), 'URL': geturls_text
.url
, 'Code': geturls_text
.status_code
};
607 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
608 datasize
= len(databytes
);
609 fulldatasize
= datasize
+ fulldatasize
;
612 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
613 downloaddiff
= fulldatasize
- prevdownsize
;
614 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
615 prevdownsize
= fulldatasize
;
618 geturls_text
.close();
619 exec_time_end
= time
.time();
620 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
621 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
624 if(haverequests
==False):
625 def download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, buffersize
=524288, sleep
=-1):
626 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
, sleep
)
629 if(haverequests
==True):
630 def download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpcookie
, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
631 global geturls_download_sleep
;
633 sleep
= geturls_download_sleep
;
634 if(not outfile
=="-"):
635 outpath
= outpath
.rstrip(os
.path
.sep
);
636 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
637 if(not os
.path
.exists(outpath
)):
638 os
.makedirs(outpath
);
639 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
641 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
643 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
644 tmpfilename
= pretmpfilename
['Filename'];
645 downloadsize
= os
.path
.getsize(tmpfilename
);
647 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
648 exec_time_start
= time
.time();
649 shutil
.move(tmpfilename
, filepath
);
650 exec_time_end
= time
.time();
651 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
652 if(os
.path
.exists(tmpfilename
)==True):
653 os
.remove(tmpfilename
);
654 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
655 if(outfile
=="-" and sys
.version
[0]=="2"):
656 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
657 tmpfilename
= pretmpfilename
['Filename'];
658 downloadsize
= os
.path
.getsize(tmpfilename
);
661 exec_time_start
= time
.time();
662 with
open(tmpfilename
, 'rb') as ft
:
665 databytes
= ft
.read(buffersize
[1]);
666 if not databytes
: break;
667 datasize
= len(databytes
);
668 fulldatasize
= datasize
+ fulldatasize
;
671 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
672 downloaddiff
= fulldatasize
- prevdownsize
;
673 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
674 prevdownsize
= fulldatasize
;
677 fdata
= f
.getvalue();
680 os
.remove(tmpfilename
);
681 exec_time_end
= time
.time();
682 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
683 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
684 if(outfile
=="-" and sys
.version
[0]>="3"):
685 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
686 tmpfilename
= pretmpfilename
['Filename'];
687 downloadsize
= os
.path
.getsize(tmpfilename
);
690 exec_time_start
= time
.time();
691 with
open(tmpfilename
, 'rb') as ft
:
694 databytes
= ft
.read(buffersize
[1]);
695 if not databytes
: break;
696 datasize
= len(databytes
);
697 fulldatasize
= datasize
+ fulldatasize
;
700 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
701 downloaddiff
= fulldatasize
- prevdownsize
;
702 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
703 prevdownsize
= fulldatasize
;
706 fdata
= f
.getvalue();
709 os
.remove(tmpfilename
);
710 exec_time_end
= time
.time();
711 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
712 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
715 if(haverequests
==False):
716 def download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpcookie
, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
717 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
, outfile
, outpath
, sleep
)
720 if(havemechanize
==True):
721 def download_from_url_with_mechanize(httpurl
, httpheaders
, httpcookie
, sleep
=-1):
722 global geturls_download_sleep
;
724 sleep
= geturls_download_sleep
;
725 geturls_opener
= mechanize
.Browser();
726 if(isinstance(httpheaders
, dict)):
727 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
729 geturls_opener
.addheaders
= httpheaders
;
730 geturls_opener
.set_cookiejar(httpcookie
);
731 geturls_opener
.set_handle_robots(False);
732 geturls_text
= geturls_opener
.open(httpurl
);
733 log
.info("Downloading URL "+httpurl
);
734 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
735 if(sys
.version
[0]=="2"):
736 strbuf
= StringIO(geturls_text
.read());
737 if(sys
.version
[0]>="3"):
738 strbuf
= BytesIO(geturls_text
.read());
739 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
740 returnval_content
= gzstrbuf
.read()[:];
741 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
742 returnval_content
= geturls_text
.read()[:];
743 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.code
};
744 geturls_text
.close();
747 if(havemechanize
==False):
748 def download_from_url_with_mechanize(httpurl
, httpheaders
, httpcookie
, sleep
=-1):
749 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, sleep
)
752 if(havemechanize
==True):
753 def download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, buffersize
=524288, sleep
=-1):
754 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
755 exec_time_start
= time
.time();
756 myhash
= hashlib
.new("sha1");
757 if(sys
.version
[0]=="2"):
758 myhash
.update(httpurl
);
759 myhash
.update(str(buffersize
));
760 myhash
.update(str(exec_time_start
));
761 if(sys
.version
[0]>="3"):
762 myhash
.update(httpurl
.encode('utf-8'));
763 myhash
.update(str(buffersize
).encode('utf-8'));
764 myhash
.update(str(exec_time_start
).encode('utf-8'));
765 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
767 sleep
= geturls_download_sleep
;
768 geturls_opener
= mechanize
.Browser();
769 if(isinstance(httpheaders
, dict)):
770 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
772 geturls_opener
.addheaders
= httpheaders
;
773 geturls_opener
.set_cookiejar(httpcookie
);
774 geturls_opener
.set_handle_robots(False);
775 geturls_text
= geturls_opener
.open(httpurl
);
776 downloadsize
= int(geturls_text
.info().get('Content-Length'));
777 if(downloadsize
is not None):
778 downloadsize
= int(downloadsize
);
779 if downloadsize
is None: downloadsize
= 0;
782 log
.info("Downloading URL "+httpurl
);
783 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
784 tmpfilename
= f
.name
;
785 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.code
};
787 databytes
= geturls_text
.read(buffersize
);
788 if not databytes
: break;
789 datasize
= len(databytes
);
790 fulldatasize
= datasize
+ fulldatasize
;
793 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
794 downloaddiff
= fulldatasize
- prevdownsize
;
795 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
796 prevdownsize
= fulldatasize
;
799 geturls_text
.close();
800 exec_time_end
= time
.time();
801 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
802 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
805 if(havemechanize
==False):
806 def download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, buffersize
=524288, sleep
=-1):
807 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
, sleep
)
810 if(havemechanize
==True):
811 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
812 global geturls_download_sleep
;
814 sleep
= geturls_download_sleep
;
815 if(not outfile
=="-"):
816 outpath
= outpath
.rstrip(os
.path
.sep
);
817 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
818 if(not os
.path
.exists(outpath
)):
819 os
.makedirs(outpath
);
820 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
822 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
824 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
825 tmpfilename
= pretmpfilename
['Filename'];
826 downloadsize
= os
.path
.getsize(tmpfilename
);
828 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
829 exec_time_start
= time
.time();
830 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
831 shutil
.move(tmpfilename
, filepath
);
832 if(os
.path
.exists(tmpfilename
)==True):
833 os
.remove(tmpfilename
);
834 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
835 if(outfile
=="-" and sys
.version
[0]=="2"):
836 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
837 tmpfilename
= pretmpfilename
['Filename'];
838 downloadsize
= os
.path
.getsize(tmpfilename
);
841 exec_time_start
= time
.time();
842 with
open(tmpfilename
, 'rb') as ft
:
845 databytes
= ft
.read(buffersize
[1]);
846 if not databytes
: break;
847 datasize
= len(databytes
);
848 fulldatasize
= datasize
+ fulldatasize
;
851 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
852 downloaddiff
= fulldatasize
- prevdownsize
;
853 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
854 prevdownsize
= fulldatasize
;
857 fdata
= f
.getvalue();
860 os
.remove(tmpfilename
);
861 exec_time_end
= time
.time();
862 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
863 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
864 if(outfile
=="-" and sys
.version
[0]>="3"):
865 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
866 tmpfilename
= pretmpfilename
['Filename'];
867 downloadsize
= os
.path
.getsize(tmpfilename
);
870 exec_time_start
= time
.time();
871 with
open(tmpfilename
, 'rb') as ft
:
874 databytes
= ft
.read(buffersize
[1]);
875 if not databytes
: break;
876 datasize
= len(databytes
);
877 fulldatasize
= datasize
+ fulldatasize
;
880 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
881 downloaddiff
= fulldatasize
- prevdownsize
;
882 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
883 prevdownsize
= fulldatasize
;
886 fdata
= f
.getvalue();
889 os
.remove(tmpfilename
);
890 exec_time_end
= time
.time();
891 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
892 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
895 if(havemechanize
==False):
896 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
897 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
, outfile
, outpath
, sleep
)