Update pywwwget.py
[PyWWW-Get.git] / pywwwget.py
blob5eda40a8c878927611398dd2884157ea67cc937c
1 #!/usr/bin/env python
3 '''
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016 Cool Dude 2k - http://idb.berlios.de/
13 Copyright 2016 Game Maker 2k - http://intdb.sourceforge.net/
14 Copyright 2016 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
16 $FileInfo: pywwwget.py - Last Update: 6/17/2016 Ver. 0.4.7 RC 1 - Author: cooldude2k $
17 '''
19 from __future__ import division, absolute_import, print_function;
20 import re, os, sys, hashlib, shutil, platform, tempfile, urllib, gzip, time, argparse, cgi, subprocess, imp;
21 import logging as log;
22 haverequests = False;
23 try:
24 imp.find_module('requests');
25 haverequests = True;
26 import requests;
27 except ImportError:
28 haverequests = False;
29 havemechanize = False;
30 try:
31 imp.find_module('mechanize');
32 havemechanize = True;
33 import mechanize;
34 except ImportError:
35 havemechanize = False;
36 if(sys.version[0]=="2"):
37 try:
38 from cStringIO import StringIO;
39 except ImportError:
40 from StringIO import StringIO;
41 # From http://python-future.org/compatible_idioms.html
42 from urlparse import urlparse, urlunparse, urlsplit, urlunsplit, urljoin;
43 from urllib import urlencode;
44 from urllib2 import urlopen, Request, HTTPError;
45 import urllib2, urlparse, cookielib;
46 if(sys.version[0]>="3"):
47 from io import StringIO, BytesIO;
48 # From http://python-future.org/compatible_idioms.html
49 from urllib.parse import urlparse, urlunparse, urlsplit, urlunsplit, urljoin, urlencode;
50 from urllib.request import urlopen, Request;
51 from urllib.error import HTTPError;
52 import urllib.request as urllib2;
53 import urllib.parse as urlparse;
54 import http.cookiejar as cookielib;
56 __program_name__ = "PyWWW-Get";
57 __project__ = __program_name__;
58 __project_url__ = "https://github.com/GameMaker2k/PyWWW-Get";
59 __version_info__ = (0, 4, 7, "RC 1", 1);
60 __version_date_info__ = (2016, 6, 17, "RC 1", 1);
61 __version_date__ = str(__version_date_info__[0])+"."+str(__version_date_info__[1]).zfill(2)+"."+str(__version_date_info__[2]).zfill(2);
62 __revision__ = __version_info__[3];
63 __revision_id__ = "$Id$";
64 if(__version_info__[4]!=None):
65 __version_date_plusrc__ = __version_date__+"-"+str(__version_date_info__[4]);
66 if(__version_info__[4]==None):
67 __version_date_plusrc__ = __version_date__;
68 if(__version_info__[3]!=None):
69 __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2])+" "+str(__version_info__[3]);
70 if(__version_info__[3]==None):
71 __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2]);
73 tmpfileprefix = "py"+str(sys.version_info[0])+"wwwget"+str(__version_info__[0])+"-";
74 tmpfilesuffix = "-";
75 pytempdir = tempfile.gettempdir();
77 geturls_cj = cookielib.CookieJar();
78 geturls_ua_firefox_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0";
79 geturls_ua_seamonkey_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.3";
80 geturls_ua_chrome_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36";
81 geturls_ua_chromium_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chromium/67.0.3396.99 Chrome/67.0.3396.99 Safari/537.36";
82 geturls_ua_palemoon_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.3";
83 geturls_ua_opera_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.54";
84 geturls_ua_vivaldi_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.183 Safari/537.36 Vivaldi/1.96.1147.52";
85 geturls_ua_internet_explorer_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; Trident/7.0; rv:11.0) like Gecko";
86 geturls_ua_microsoft_edge_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134";
87 geturls_ua_pywwwget_python = "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname=__project__, prover=__version__, prourl=__project_url__);
88 if(platform.python_implementation()!=""):
89 geturls_ua_pywwwget_python_alt = "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver=platform.system()+" "+platform.release(), archtype=platform.machine(), prourl=__project_url__, pyimp=platform.python_implementation(), pyver=platform.python_version(), proname=__project__, prover=__version__);
90 if(platform.python_implementation()==""):
91 geturls_ua_pywwwget_python_alt = "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver=platform.system()+" "+platform.release(), archtype=platform.machine(), prourl=__project_url__, pyimp="Python", pyver=platform.python_version(), proname=__project__, prover=__version__);
92 geturls_ua_googlebot_google = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
93 geturls_ua_googlebot_google_old = "Googlebot/2.1 (+http://www.google.com/bot.html)";
94 geturls_ua = geturls_ua_firefox_windows7;
95 geturls_headers_firefox_windows7 = {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_firefox_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
96 geturls_headers_seamonkey_windows7 = {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_seamonkey_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
97 geturls_headers_chrome_windows7 = {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_chrome_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
98 geturls_headers_chromium_windows7 = {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_chromium_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
99 geturls_headers_palemoon_windows7 = {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_palemoon_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
100 geturls_headers_opera_windows7 = {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_opera_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
101 geturls_headers_vivaldi_windows7 = {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_vivaldi_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
102 geturls_headers_internet_explorer_windows7 = {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_internet_explorer_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
103 geturls_headers_microsoft_edge_windows7 = {'Referer': "http://motherless.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
104 geturls_headers_pywwwget_python = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
105 geturls_headers_pywwwget_python_alt = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
106 geturls_headers_googlebot_google = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
107 geturls_headers_googlebot_google_old = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
108 geturls_headers = geturls_headers_firefox_windows7;
109 geturls_download_sleep = 0;
111 def add_url_param(url, **params):
112 n=3;
113 parts = list(urlparse.urlsplit(url));
114 d = dict(cgi.parse_qsl(parts[n])); # use cgi.parse_qs for list values
115 d.update(params);
116 parts[n]=urlencode(d);
117 return urlparse.urlunsplit(parts);
119 os.environ["PATH"] = os.environ["PATH"] + os.pathsep + os.path.dirname(os.path.realpath(__file__)) + os.pathsep + os.getcwd();
120 def which_exec(execfile):
121 for path in os.environ["PATH"].split(":"):
122 if os.path.exists(path + "/" + execfile):
123 return path + "/" + execfile;
125 def listize(varlist):
126 il = 0;
127 ix = len(varlist);
128 ilx = 1;
129 newlistreg = {};
130 newlistrev = {};
131 newlistfull = {};
132 while(il < ix):
133 newlistreg.update({ilx: varlist[il]});
134 newlistrev.update({varlist[il]: ilx});
135 ilx = ilx + 1;
136 il = il + 1;
137 newlistfull = {1: newlistreg, 2: newlistrev, 'reg': newlistreg, 'rev': newlistrev};
138 return newlistfull;
140 def twolistize(varlist):
141 il = 0;
142 ix = len(varlist);
143 ilx = 1;
144 newlistnamereg = {};
145 newlistnamerev = {};
146 newlistdescreg = {};
147 newlistdescrev = {};
148 newlistfull = {};
149 while(il < ix):
150 newlistnamereg.update({ilx: varlist[il][0].strip()});
151 newlistnamerev.update({varlist[il][0].strip(): ilx});
152 newlistdescreg.update({ilx: varlist[il][1].strip()});
153 newlistdescrev.update({varlist[il][1].strip(): ilx});
154 ilx = ilx + 1;
155 il = il + 1;
156 newlistnametmp = {1: newlistnamereg, 2: newlistnamerev, 'reg': newlistnamereg, 'rev': newlistnamerev};
157 newlistdesctmp = {1: newlistdescreg, 2: newlistdescrev, 'reg': newlistdescreg, 'rev': newlistdescrev};
158 newlistfull = {1: newlistnametmp, 2: newlistdesctmp, 'name': newlistnametmp, 'desc': newlistdesctmp}
159 return newlistfull;
161 def arglistize(proexec, *varlist):
162 il = 0;
163 ix = len(varlist);
164 ilx = 1;
165 newarglist = [proexec];
166 while(il < ix):
167 if varlist[il][0] is not None:
168 newarglist.append(varlist[il][0]);
169 if varlist[il][1] is not None:
170 newarglist.append(varlist[il][1]);
171 il = il + 1;
172 return newarglist;
174 # hms_string by ArcGIS Python Recipes
175 # https://arcpy.wordpress.com/2012/04/20/146/
176 def hms_string(sec_elapsed):
177 h = int(sec_elapsed / (60 * 60));
178 m = int((sec_elapsed % (60 * 60)) / 60);
179 s = sec_elapsed % 60.0;
180 return "{}:{:>02}:{:>05.2f}".format(h, m, s);
182 # get_readable_size by Lipis
183 # http://stackoverflow.com/posts/14998888/revisions
184 def get_readable_size(bytes, precision=1, unit="IEC"):
185 unit = unit.upper();
186 if(unit!="IEC" and unit!="SI"):
187 unit = "IEC";
188 if(unit=="IEC"):
189 units = [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
190 unitswos = ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
191 unitsize = 1024.0;
192 if(unit=="SI"):
193 units = [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
194 unitswos = ["B","kB","MB","GB","TB","PB","EB","ZB"];
195 unitsize = 1000.0;
196 return_val = {};
197 orgbytes = bytes;
198 for unit in units:
199 if abs(bytes) < unitsize:
200 strformat = "%3."+str(precision)+"f%s";
201 pre_return_val = (strformat % (bytes, unit));
202 pre_return_val = re.sub(r"([0]+) ([A-Za-z]+)", r" \2", pre_return_val);
203 pre_return_val = re.sub(r"\. ([A-Za-z]+)", r" \1", pre_return_val);
204 alt_return_val = pre_return_val.split();
205 return_val = {'Bytes': orgbytes, 'ReadableWithSuffix': pre_return_val, 'ReadableWithoutSuffix': alt_return_val[0], 'ReadableSuffix': alt_return_val[1]}
206 return return_val;
207 bytes /= unitsize;
208 strformat = "%."+str(precision)+"f%s";
209 pre_return_val = (strformat % (bytes, "YiB"));
210 pre_return_val = re.sub(r"([0]+) ([A-Za-z]+)", r" \2", pre_return_val);
211 pre_return_val = re.sub(r"\. ([A-Za-z]+)", r" \1", pre_return_val);
212 alt_return_val = pre_return_val.split();
213 return_val = {'Bytes': orgbytes, 'ReadableWithSuffix': pre_return_val, 'ReadableWithoutSuffix': alt_return_val[0], 'ReadableSuffix': alt_return_val[1]}
214 return return_val;
216 def get_readable_size_from_file(infile, precision=1, unit="IEC", usehashes=False, usehashtypes="md5,sha1"):
217 unit = unit.upper();
218 usehashtypes = usehashtypes.lower();
219 getfilesize = os.path.getsize(infile);
220 return_val = get_readable_size(getfilesize, precision, unit);
221 if(usehashes==True):
222 hashtypelist = usehashtypes.split(",");
223 openfile = open(infile, "rb");
224 filecontents = openfile.read();
225 openfile.close();
226 listnumcount = 0;
227 listnumend = len(hashtypelist);
228 while(listnumcount < listnumend):
229 hashtypelistlow = hashtypelist[listnumcount].strip();
230 hashtypelistup = hashtypelistlow.upper();
231 filehash = hashlib.new(hashtypelistup);
232 filehash.update(filecontents);
233 filegethash = filehash.hexdigest();
234 return_val.update({hashtypelistup: filegethash});
235 listnumcount += 1;
236 return return_val;
238 def get_readable_size_from_string(instring, precision=1, unit="IEC", usehashes=False, usehashtypes="md5,sha1"):
239 unit = unit.upper();
240 usehashtypes = usehashtypes.lower();
241 getfilesize = len(instring);
242 return_val = get_readable_size(getfilesize, precision, unit);
243 if(usehashes==True):
244 hashtypelist = usehashtypes.split(",");
245 listnumcount = 0;
246 listnumend = len(hashtypelist);
247 while(listnumcount < listnumend):
248 hashtypelistlow = hashtypelist[listnumcount].strip();
249 hashtypelistup = hashtypelistlow.upper();
250 filehash = hashlib.new(hashtypelistup);
251 if(sys.version[0]=="2"):
252 filehash.update(instring);
253 if(sys.version[0]>="3"):
254 filehash.update(instring.encode('utf-8'));
255 filegethash = filehash.hexdigest();
256 return_val.update({hashtypelistup: filegethash});
257 listnumcount += 1;
258 return return_val;
260 def make_http_headers_from_dict_to_list(headers={'Referer': "http://google.com/", 'User-Agent': geturls_ua, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
261 if isinstance(headers, dict):
262 returnval = [];
263 if(sys.version[0]=="2"):
264 for headkey, headvalue in headers.iteritems():
265 returnval.append((headkey, headvalue));
266 if(sys.version[0]>="3"):
267 for headkey, headvalue in headers.items():
268 returnval.append((headkey, headvalue));
269 elif isinstance(headers, list):
270 returnval = headers;
271 else:
272 returnval = False;
273 return returnval;
275 def make_http_headers_from_dict_to_pycurl(headers={'Referer': "http://google.com/", 'User-Agent': geturls_ua, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
276 if isinstance(headers, dict):
277 returnval = [];
278 if(sys.version[0]=="2"):
279 for headkey, headvalue in headers.iteritems():
280 returnval.append(headkey+": "+headvalue);
281 if(sys.version[0]>="3"):
282 for headkey, headvalue in headers.items():
283 returnval.append(headkey+": "+headvalue);
284 elif isinstance(headers, list):
285 returnval = headers;
286 else:
287 returnval = False;
288 return returnval;
290 def make_http_headers_from_list_to_dict(headers=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
291 if isinstance(headers, list):
292 returnval = {};
293 mli = 0;
294 mlil = len(headers);
295 while(mli<mlil):
296 returnval.update({headers[mli][0]: headers[mli][1]});
297 mli = mli + 1;
298 elif isinstance(headers, dict):
299 returnval = headers;
300 else:
301 returnval = False;
302 return returnval;
304 def get_httplib_support(checkvalue=None):
305 global haverequests, havemechanize;
306 returnval = [];
307 returnval.append("urllib");
308 if(haverequests==True):
309 returnval.append("requests");
310 if(havemechanize==True):
311 returnval.append("mechanize");
312 if(not checkvalue==None):
313 if(checkvalue=="urllib1" or checkvalue=="urllib2"):
314 checkvalue = "urllib";
315 if(checkvalue in returnval):
316 returnval = True;
317 else:
318 returnval = False;
319 return returnval;
321 def check_httplib_support(checkvalue="urllib"):
322 if(checkvalue=="urllib1" or checkvalue=="urllib2"):
323 checkvalue = "urllib";
324 returnval = get_httplib_support(checkvalue);
325 return returnval;
327 def get_httplib_support_list():
328 returnval = get_httplib_support(None);
329 return returnval;
331 def download_from_url(httpurl, httpheaders, httpcookie, httplibuse="urllib", sleep=-1):
332 global geturls_download_sleep, haverequests, havemechanize;
333 if(sleep<0):
334 sleep = geturls_download_sleep;
335 if(httplibuse=="urllib1" or httplibuse=="urllib2"):
336 httplibuse = "urllib";
337 if(haverequests==False and httplibuse=="requests"):
338 httplibuse = "urllib";
339 if(havemechanize==False and httplibuse=="mechanize"):
340 httplibuse = "urllib";
341 if(httplibuse=="urllib"):
342 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpcookie, sleep);
343 elif(httplibuse=="requests"):
344 returnval = download_from_url_with_requests(httpurl, httpheaders, httpcookie, sleep);
345 elif(httplibuse=="mechanize"):
346 returnval = download_from_url_with_mechanize(httpurl, httpheaders, httpcookie, sleep);
347 else:
348 returnval = False;
349 return returnval;
351 def download_from_url_file(httpurl, httpheaders, httpcookie, httplibuse="urllib", buffersize=524288, sleep=-1):
352 global geturls_download_sleep, haverequests, havemechanize;
353 if(sleep<0):
354 sleep = geturls_download_sleep;
355 if(httplibuse=="urllib1" or httplibuse=="urllib2"):
356 httplibuse = "urllib";
357 if(haverequests==False and httplibuse=="requests"):
358 httplibuse = "urllib";
359 if(havemechanize==False and httplibuse=="mechanize"):
360 httplibuse = "urllib";
361 if(httplibuse=="urllib"):
362 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize, sleep);
363 elif(httplibuse=="requests"):
364 returnval = download_from_url_file_with_requests(httpurl, httpheaders, httpcookie, buffersize, sleep);
365 elif(httplibuse=="mechanize"):
366 returnval = download_from_url_file_with_mechanize(httpurl, httpheaders, httpcookie, buffersize, sleep);
367 else:
368 returnval = False;
369 return returnval;
371 def download_from_url_to_file(httpurl, httpheaders, httpcookie, httplibuse="urllib", outfile="-", outpath=os.getcwd(), buffersize=[524288, 524288], sleep=-1):
372 global geturls_download_sleep, haverequests, havemechanize;
373 if(sleep<0):
374 sleep = geturls_download_sleep;
375 if(httplibuse=="urllib1" or httplibuse=="urllib2"):
376 httplibuse = "urllib";
377 if(haverequests==False and httplibuse=="requests"):
378 httplibuse = "urllib";
379 if(havemechanize==False and httplibuse=="mechanize"):
380 httplibuse = "urllib";
381 if(httplibuse=="urllib"):
382 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpcookie, outfile, outpath, buffersize, sleep);
383 elif(httplibuse=="requests"):
384 returnval = download_from_url_to_file_with_requests(httpurl, httpheaders, httpcookie, outfile, outpath, buffersize, sleep);
385 elif(httplibuse=="mechanize"):
386 returnval = download_from_url_to_file_with_mechanize(httpurl, httpheaders, httpcookie, outfile, outpath, buffersize, sleep);
387 else:
388 returnval = False;
389 return returnval;
391 def download_from_url_with_urllib(httpurl, httpheaders, httpcookie, sleep=-1):
392 global geturls_download_sleep;
393 if(sleep<0):
394 sleep = geturls_download_sleep;
395 geturls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(httpcookie));
396 if(isinstance(httpheaders, dict)):
397 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
398 geturls_opener.addheaders = httpheaders;
399 time.sleep(sleep);
400 geturls_text = geturls_opener.open(httpurl);
401 log.info("Downloading URL "+httpurl);
402 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
403 if(sys.version[0]=="2"):
404 strbuf = StringIO(geturls_text.read());
405 if(sys.version[0]>="3"):
406 strbuf = BytesIO(geturls_text.read());
407 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
408 returnval_content = gzstrbuf.read()[:];
409 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
410 returnval_content = geturls_text.read()[:];
411 returnval = {'Type': "Content", 'Content': returnval_content, 'Headers': dict(geturls_text.info()), 'URL': geturls_text.geturl(), 'Code': geturls_text.getcode()};
412 geturls_text.close();
413 return returnval;
415 def download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize=524288, sleep=-1):
416 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
417 exec_time_start = time.time();
418 myhash = hashlib.new("sha1");
419 if(sys.version[0]=="2"):
420 myhash.update(httpurl);
421 myhash.update(str(buffersize));
422 myhash.update(str(exec_time_start));
423 if(sys.version[0]>="3"):
424 myhash.update(httpurl.encode('utf-8'));
425 myhash.update(str(buffersize).encode('utf-8'));
426 myhash.update(str(exec_time_start).encode('utf-8'));
427 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
428 if(sleep<0):
429 sleep = geturls_download_sleep;
430 geturls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(httpcookie));
431 if(isinstance(httpheaders, dict)):
432 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
433 geturls_opener.addheaders = httpheaders;
434 time.sleep(sleep);
435 geturls_text = geturls_opener.open(httpurl);
436 downloadsize = geturls_text.info().get('Content-Length');
437 if(downloadsize is not None):
438 downloadsize = int(downloadsize);
439 if downloadsize is None: downloadsize = 0;
440 fulldatasize = 0;
441 prevdownsize = 0;
442 log.info("Downloading URL "+httpurl);
443 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
444 tmpfilename = f.name;
445 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'Headers': dict(geturls_text.info()), 'URL': geturls_text.geturl(), 'Code': geturls_text.getcode()};
446 while True:
447 databytes = geturls_text.read(buffersize);
448 if not databytes: break;
449 datasize = len(databytes);
450 fulldatasize = datasize + fulldatasize;
451 percentage = "";
452 if(downloadsize>0):
453 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
454 downloaddiff = fulldatasize - prevdownsize;
455 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
456 prevdownsize = fulldatasize;
457 f.write(databytes);
458 f.close();
459 geturls_text.close();
460 exec_time_end = time.time();
461 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
462 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
463 return returnval;
465 def download_from_url_to_file_with_urllib(httpurl, httpheaders, httpcookie, outfile="-", outpath=os.getcwd(), buffersize=[524288, 524288], sleep=-1):
466 global geturls_download_sleep;
467 if(sleep<0):
468 sleep = geturls_download_sleep;
469 if(not outfile=="-"):
470 outpath = outpath.rstrip(os.path.sep);
471 filepath = os.path.realpath(outpath+os.path.sep+outfile);
472 if(not os.path.exists(outpath)):
473 os.makedirs(outpath);
474 if(os.path.exists(outpath) and os.path.isfile(outpath)):
475 return False;
476 if(os.path.exists(filepath) and os.path.isdir(filepath)):
477 return False;
478 pretmpfilename = download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
479 tmpfilename = pretmpfilename['Filename'];
480 downloadsize = os.path.getsize(tmpfilename);
481 fulldatasize = 0;
482 log.info("Moving file "+tmpfilename+" to "+filepath);
483 exec_time_start = time.time();
484 shutil.move(tmpfilename, filepath);
485 exec_time_end = time.time();
486 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
487 if(os.path.exists(tmpfilename)==True):
488 os.remove(tmpfilename);
489 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
490 if(outfile=="-" and sys.version[0]=="2"):
491 pretmpfilename = download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
492 tmpfilename = pretmpfilename['Filename'];
493 downloadsize = os.path.getsize(tmpfilename);
494 fulldatasize = 0;
495 prevdownsize = 0;
496 exec_time_start = time.time();
497 with open(tmpfilename, 'rb') as ft:
498 f = StringIO();
499 while True:
500 databytes = ft.read(buffersize[1]);
501 if not databytes: break;
502 datasize = len(databytes);
503 fulldatasize = datasize + fulldatasize;
504 percentage = "";
505 if(downloadsize>0):
506 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
507 downloaddiff = fulldatasize - prevdownsize;
508 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
509 prevdownsize = fulldatasize;
510 f.write(databytes);
511 f.seek(0);
512 fdata = f.getvalue();
513 f.close();
514 ft.close();
515 os.remove(tmpfilename);
516 exec_time_end = time.time();
517 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
518 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
519 if(outfile=="-" and sys.version[0]>="3"):
520 pretmpfilename = download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
521 tmpfilename = pretmpfilename['Filename'];
522 downloadsize = os.path.getsize(tmpfilename);
523 fulldatasize = 0;
524 prevdownsize = 0;
525 exec_time_start = time.time();
526 with open(tmpfilename, 'rb') as ft:
527 f = BytesIO();
528 while True:
529 databytes = ft.read(buffersize[1]);
530 if not databytes: break;
531 datasize = len(databytes);
532 fulldatasize = datasize + fulldatasize;
533 percentage = "";
534 if(downloadsize>0):
535 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
536 downloaddiff = fulldatasize - prevdownsize;
537 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
538 prevdownsize = fulldatasize;
539 f.write(databytes);
540 f.seek(0);
541 fdata = f.getvalue();
542 f.close();
543 ft.close();
544 os.remove(tmpfilename);
545 exec_time_end = time.time();
546 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
547 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
548 return returnval;
550 if(haverequests==True):
551 def download_from_url_with_requests(httpurl, httpheaders, httpcookie, sleep=-1):
552 global geturls_download_sleep;
553 if(sleep<0):
554 sleep = geturls_download_sleep;
555 if(isinstance(httpheaders, list)):
556 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
557 time.sleep(sleep);
558 geturls_text = requests.get(httpurl, headers=httpheaders, cookies=httpcookie);
559 log.info("Downloading URL "+httpurl);
560 if(geturls_text.headers.get('Content-Type')=="gzip" or geturls_text.headers.get('Content-Type')=="deflate"):
561 if(sys.version[0]=="2"):
562 strbuf = StringIO(geturls_text.content);
563 if(sys.version[0]>="3"):
564 strbuf = BytesIO(geturls_text.content);
565 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
566 returnval_content = gzstrbuf.content[:];
567 if(geturls_text.headers.get('Content-Type')!="gzip" and geturls_text.headers.get('Content-Type')!="deflate"):
568 returnval_content = geturls_text.content[:];
569 returnval = {'Type': "Content", 'Content': returnval_content, 'Headers': dict(geturls_text.headers), 'URL': geturls_text.url, 'Code': geturls_text.status_code};
570 geturls_text.close();
571 return returnval;
573 if(haverequests==False):
574 def download_from_url_with_requests(httpurl, httpheaders, httpcookie, sleep=-1):
575 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpcookie, sleep)
576 return returnval;
578 if(haverequests==True):
579 def download_from_url_file_with_requests(httpurl, httpheaders, httpcookie, buffersize=524288, sleep=-1):
580 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
581 exec_time_start = time.time();
582 myhash = hashlib.new("sha1");
583 if(sys.version[0]=="2"):
584 myhash.update(httpurl);
585 myhash.update(str(buffersize));
586 myhash.update(str(exec_time_start));
587 if(sys.version[0]>="3"):
588 myhash.update(httpurl.encode('utf-8'));
589 myhash.update(str(buffersize).encode('utf-8'));
590 myhash.update(str(exec_time_start).encode('utf-8'));
591 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
592 if(sleep<0):
593 sleep = geturls_download_sleep;
594 if(isinstance(httpheaders, list)):
595 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
596 time.sleep(sleep);
597 geturls_text = requests.get(httpurl, headers=httpheaders, cookies=httpcookie, stream=True);
598 downloadsize = int(geturls_text.headers.get('Content-Length'));
599 if(downloadsize is not None):
600 downloadsize = int(downloadsize);
601 if downloadsize is None: downloadsize = 0;
602 fulldatasize = 0;
603 prevdownsize = 0;
604 log.info("Downloading URL "+httpurl);
605 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
606 tmpfilename = f.name;
607 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'Headers': dict(geturls_text.headers), 'URL': geturls_text.url, 'Code': geturls_text.status_code};
608 for databytes in geturls_text.iter_content(chunk_size=buffersize):
609 datasize = len(databytes);
610 fulldatasize = datasize + fulldatasize;
611 percentage = "";
612 if(downloadsize>0):
613 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
614 downloaddiff = fulldatasize - prevdownsize;
615 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
616 prevdownsize = fulldatasize;
617 f.write(databytes);
618 f.close();
619 geturls_text.close();
620 exec_time_end = time.time();
621 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
622 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
623 return returnval;
625 if(haverequests==False):
626 def download_from_url_file_with_requests(httpurl, httpheaders, httpcookie, buffersize=524288, sleep=-1):
627 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize, sleep)
628 return returnval;
630 if(haverequests==True):
631 def download_from_url_to_file_with_requests(httpurl, httpheaders, httpcookie, outfile="-", outpath=os.getcwd(), buffersize=[524288, 524288], sleep=-1):
632 global geturls_download_sleep;
633 if(sleep<0):
634 sleep = geturls_download_sleep;
635 if(not outfile=="-"):
636 outpath = outpath.rstrip(os.path.sep);
637 filepath = os.path.realpath(outpath+os.path.sep+outfile);
638 if(not os.path.exists(outpath)):
639 os.makedirs(outpath);
640 if(os.path.exists(outpath) and os.path.isfile(outpath)):
641 return False;
642 if(os.path.exists(filepath) and os.path.isdir(filepath)):
643 return False;
644 pretmpfilename = download_from_url_file_with_requests(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
645 tmpfilename = pretmpfilename['Filename'];
646 downloadsize = os.path.getsize(tmpfilename);
647 fulldatasize = 0;
648 log.info("Moving file "+tmpfilename+" to "+filepath);
649 exec_time_start = time.time();
650 shutil.move(tmpfilename, filepath);
651 exec_time_end = time.time();
652 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
653 if(os.path.exists(tmpfilename)==True):
654 os.remove(tmpfilename);
655 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
656 if(outfile=="-" and sys.version[0]=="2"):
657 pretmpfilename = download_from_url_file_with_requests(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
658 tmpfilename = pretmpfilename['Filename'];
659 downloadsize = os.path.getsize(tmpfilename);
660 fulldatasize = 0;
661 prevdownsize = 0;
662 exec_time_start = time.time();
663 with open(tmpfilename, 'rb') as ft:
664 f = StringIO();
665 while True:
666 databytes = ft.read(buffersize[1]);
667 if not databytes: break;
668 datasize = len(databytes);
669 fulldatasize = datasize + fulldatasize;
670 percentage = "";
671 if(downloadsize>0):
672 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
673 downloaddiff = fulldatasize - prevdownsize;
674 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
675 prevdownsize = fulldatasize;
676 f.write(databytes);
677 f.seek(0);
678 fdata = f.getvalue();
679 f.close();
680 ft.close();
681 os.remove(tmpfilename);
682 exec_time_end = time.time();
683 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
684 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
685 if(outfile=="-" and sys.version[0]>="3"):
686 pretmpfilename = download_from_url_file_with_requests(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
687 tmpfilename = pretmpfilename['Filename'];
688 downloadsize = os.path.getsize(tmpfilename);
689 fulldatasize = 0;
690 prevdownsize = 0;
691 exec_time_start = time.time();
692 with open(tmpfilename, 'rb') as ft:
693 f = BytesIO();
694 while True:
695 databytes = ft.read(buffersize[1]);
696 if not databytes: break;
697 datasize = len(databytes);
698 fulldatasize = datasize + fulldatasize;
699 percentage = "";
700 if(downloadsize>0):
701 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
702 downloaddiff = fulldatasize - prevdownsize;
703 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
704 prevdownsize = fulldatasize;
705 f.write(databytes);
706 f.seek(0);
707 fdata = f.getvalue();
708 f.close();
709 ft.close();
710 os.remove(tmpfilename);
711 exec_time_end = time.time();
712 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
713 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
714 return returnval;
716 if(haverequests==False):
717 def download_from_url_to_file_with_requests(httpurl, httpheaders, httpcookie, outfile="-", outpath=os.getcwd(), buffersize=[524288, 524288], sleep=-1):
718 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize, outfile, outpath, sleep)
719 return returnval;
721 if(havemechanize==True):
722 def download_from_url_with_mechanize(httpurl, httpheaders, httpcookie, sleep=-1):
723 global geturls_download_sleep;
724 if(sleep<0):
725 sleep = geturls_download_sleep;
726 geturls_opener = mechanize.Browser();
727 if(isinstance(httpheaders, dict)):
728 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
729 time.sleep(sleep);
730 geturls_opener.addheaders = httpheaders;
731 geturls_opener.set_cookiejar(httpcookie);
732 geturls_opener.set_handle_robots(False);
733 geturls_text = geturls_opener.open(httpurl);
734 log.info("Downloading URL "+httpurl);
735 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
736 if(sys.version[0]=="2"):
737 strbuf = StringIO(geturls_text.read());
738 if(sys.version[0]>="3"):
739 strbuf = BytesIO(geturls_text.read());
740 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
741 returnval_content = gzstrbuf.read()[:];
742 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
743 returnval_content = geturls_text.read()[:];
744 returnval = {'Type': "Content", 'Content': returnval_content, 'Headers': dict(geturls_text.info()), 'URL': geturls_text.geturl(), 'Code': geturls_text.code};
745 geturls_text.close();
746 return returnval;
748 if(havemechanize==False):
749 def download_from_url_with_mechanize(httpurl, httpheaders, httpcookie, sleep=-1):
750 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpcookie, sleep)
751 return returnval;
753 if(havemechanize==True):
754 def download_from_url_file_with_mechanize(httpurl, httpheaders, httpcookie, buffersize=524288, sleep=-1):
755 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
756 exec_time_start = time.time();
757 myhash = hashlib.new("sha1");
758 if(sys.version[0]=="2"):
759 myhash.update(httpurl);
760 myhash.update(str(buffersize));
761 myhash.update(str(exec_time_start));
762 if(sys.version[0]>="3"):
763 myhash.update(httpurl.encode('utf-8'));
764 myhash.update(str(buffersize).encode('utf-8'));
765 myhash.update(str(exec_time_start).encode('utf-8'));
766 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
767 if(sleep<0):
768 sleep = geturls_download_sleep;
769 geturls_opener = mechanize.Browser();
770 if(isinstance(httpheaders, dict)):
771 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
772 time.sleep(sleep);
773 geturls_opener.addheaders = httpheaders;
774 geturls_opener.set_cookiejar(httpcookie);
775 geturls_opener.set_handle_robots(False);
776 geturls_text = geturls_opener.open(httpurl);
777 downloadsize = int(geturls_text.info().get('Content-Length'));
778 if(downloadsize is not None):
779 downloadsize = int(downloadsize);
780 if downloadsize is None: downloadsize = 0;
781 fulldatasize = 0;
782 prevdownsize = 0;
783 log.info("Downloading URL "+httpurl);
784 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
785 tmpfilename = f.name;
786 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'Headers': dict(geturls_text.info()), 'URL': geturls_text.geturl(), 'Code': geturls_text.code};
787 while True:
788 databytes = geturls_text.read(buffersize);
789 if not databytes: break;
790 datasize = len(databytes);
791 fulldatasize = datasize + fulldatasize;
792 percentage = "";
793 if(downloadsize>0):
794 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
795 downloaddiff = fulldatasize - prevdownsize;
796 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
797 prevdownsize = fulldatasize;
798 f.write(databytes);
799 f.close();
800 geturls_text.close();
801 exec_time_end = time.time();
802 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
803 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
804 return returnval;
806 if(havemechanize==False):
807 def download_from_url_file_with_mechanize(httpurl, httpheaders, httpcookie, buffersize=524288, sleep=-1):
808 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize, sleep)
809 return returnval;
811 if(havemechanize==True):
812 def download_from_url_to_file_with_mechanize(httpurl, httpheaders, httpcookie, outfile="-", outpath=os.getcwd(), buffersize=[524288, 524288], sleep=-1):
813 global geturls_download_sleep;
814 if(sleep<0):
815 sleep = geturls_download_sleep;
816 if(not outfile=="-"):
817 outpath = outpath.rstrip(os.path.sep);
818 filepath = os.path.realpath(outpath+os.path.sep+outfile);
819 if(not os.path.exists(outpath)):
820 os.makedirs(outpath);
821 if(os.path.exists(outpath) and os.path.isfile(outpath)):
822 return False;
823 if(os.path.exists(filepath) and os.path.isdir(filepath)):
824 return False;
825 pretmpfilename = download_from_url_file_with_mechanize(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
826 tmpfilename = pretmpfilename['Filename'];
827 downloadsize = os.path.getsize(tmpfilename);
828 fulldatasize = 0;
829 log.info("Moving file "+tmpfilename+" to "+filepath);
830 exec_time_start = time.time();
831 shutil.move(tmpfilename, filepath);
832 exec_time_end = time.time();
833 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
834 if(os.path.exists(tmpfilename)==True):
835 os.remove(tmpfilename);
836 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
837 if(outfile=="-" and sys.version[0]=="2"):
838 pretmpfilename = download_from_url_file_with_mechanize(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
839 tmpfilename = pretmpfilename['Filename'];
840 downloadsize = os.path.getsize(tmpfilename);
841 fulldatasize = 0;
842 prevdownsize = 0;
843 exec_time_start = time.time();
844 with open(tmpfilename, 'rb') as ft:
845 f = StringIO();
846 while True:
847 databytes = ft.read(buffersize[1]);
848 if not databytes: break;
849 datasize = len(databytes);
850 fulldatasize = datasize + fulldatasize;
851 percentage = "";
852 if(downloadsize>0):
853 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
854 downloaddiff = fulldatasize - prevdownsize;
855 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
856 prevdownsize = fulldatasize;
857 f.write(databytes);
858 f.seek(0);
859 fdata = f.getvalue();
860 f.close();
861 ft.close();
862 os.remove(tmpfilename);
863 exec_time_end = time.time();
864 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
865 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
866 if(outfile=="-" and sys.version[0]>="3"):
867 pretmpfilename = download_from_url_file_with_mechanize(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
868 tmpfilename = pretmpfilename['Filename'];
869 downloadsize = os.path.getsize(tmpfilename);
870 fulldatasize = 0;
871 prevdownsize = 0;
872 exec_time_start = time.time();
873 with open(tmpfilename, 'rb') as ft:
874 f = BytesIO();
875 while True:
876 databytes = ft.read(buffersize[1]);
877 if not databytes: break;
878 datasize = len(databytes);
879 fulldatasize = datasize + fulldatasize;
880 percentage = "";
881 if(downloadsize>0):
882 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
883 downloaddiff = fulldatasize - prevdownsize;
884 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
885 prevdownsize = fulldatasize;
886 f.write(databytes);
887 f.seek(0);
888 fdata = f.getvalue();
889 f.close();
890 ft.close();
891 os.remove(tmpfilename);
892 exec_time_end = time.time();
893 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
894 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
895 return returnval;
897 if(havemechanize==False):
898 def download_from_url_to_file_with_mechanize(httpurl, httpheaders, httpcookie, outfile="-", outpath=os.getcwd(), buffersize=[524288, 524288], sleep=-1):
899 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize, outfile, outpath, sleep)
900 return returnval;