Update pywwwget.py
[PyWWW-Get.git] / pywwwget.py
blobe1f567e0c76ffa36657113c5701870f165ceed5b
1 #!/usr/bin/env python
3 '''
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016 Cool Dude 2k - http://idb.berlios.de/
13 Copyright 2016 Game Maker 2k - http://intdb.sourceforge.net/
14 Copyright 2016 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
16 $FileInfo: pywwwget.py - Last Update: 6/17/2016 Ver. 0.4.7 RC 1 - Author: cooldude2k $
17 '''
19 from __future__ import division, absolute_import, print_function;
20 import re, os, sys, hashlib, shutil, platform, tempfile, urllib, gzip, time, argparse, cgi, subprocess, imp;
21 import logging as log;
22 haverequests = False;
23 try:
24 imp.find_module('requests');
25 haverequests = True;
26 import requests;
27 except ImportError:
28 haverequests = False;
29 havemechanize = False;
30 try:
31 imp.find_module('mechanize');
32 havemechanize = True;
33 import mechanize;
34 except ImportError:
35 havemechanize = False;
36 if(sys.version[0]=="2"):
37 try:
38 from cStringIO import StringIO;
39 except ImportError:
40 from StringIO import StringIO;
41 # From http://python-future.org/compatible_idioms.html
42 from urlparse import urlparse, urlunparse, urlsplit, urlunsplit, urljoin;
43 from urllib import urlencode;
44 from urllib2 import urlopen, Request, HTTPError;
45 import urllib2, urlparse, cookielib;
46 if(sys.version[0]>="3"):
47 from io import StringIO, BytesIO;
48 # From http://python-future.org/compatible_idioms.html
49 from urllib.parse import urlparse, urlunparse, urlsplit, urlunsplit, urljoin, urlencode;
50 from urllib.request import urlopen, Request;
51 from urllib.error import HTTPError;
52 import urllib.request as urllib2;
53 import urllib.parse as urlparse;
54 import http.cookiejar as cookielib;
56 __program_name__ = "PyWWW-Get";
57 __project__ = __program_name__;
58 __project_url__ = "https://github.com/GameMaker2k/PyWWW-Get";
59 __version_info__ = (0, 4, 7, "RC 1", 1);
60 __version_date_info__ = (2016, 6, 17, "RC 1", 1);
61 __version_date__ = str(__version_date_info__[0])+"."+str(__version_date_info__[1]).zfill(2)+"."+str(__version_date_info__[2]).zfill(2);
62 if(__version_info__[4]!=None):
63 __version_date_plusrc__ = __version_date__+"-"+str(__version_date_info__[4]);
64 if(__version_info__[4]==None):
65 __version_date_plusrc__ = __version_date__;
66 if(__version_info__[3]!=None):
67 __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2])+" "+str(__version_info__[3]);
68 if(__version_info__[3]==None):
69 __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2]);
71 tmpfileprefix = "py"+str(sys.version_info[0])+"wwwget"+str(__version_info__[0])+"-";
72 tmpfilesuffix = "-";
73 pytempdir = tempfile.gettempdir();
75 geturls_cj = cookielib.CookieJar();
76 geturls_ua_firefox_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0";
77 geturls_ua_seamonkey_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.3";
78 geturls_ua_chrome_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36";
79 geturls_ua_chromium_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chromium/67.0.3396.99 Chrome/67.0.3396.99 Safari/537.36";
80 geturls_ua_midori_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/538.15 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/538.15 Midori/0.5";
81 geturls_ua_palemoon_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.3";
82 geturls_ua_opera_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.54";
83 geturls_ua_vivaldi_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36 Vivaldi/1.2.490.43";
84 geturls_ua_internet_explorer_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; Trident/7.0; rv:11.0) like Gecko";
85 geturls_ua_microsoft_edge_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134";
86 geturls_ua_pywwwget_python = "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname=__project__, prover=__version__, prourl=__project_url__);
87 if(platform.python_implementation()!=""):
88 geturls_ua_pywwwget_python_alt = "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver=platform.system()+" "+platform.release(), archtype=platform.machine(), prourl=__project_url__, pyimp=platform.python_implementation(), pyver=platform.python_version(), proname=__project__, prover=__version__);
89 if(platform.python_implementation()==""):
90 geturls_ua_pywwwget_python_alt = "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver=platform.system()+" "+platform.release(), archtype=platform.machine(), prourl=__project_url__, pyimp="Python", pyver=platform.python_version(), proname=__project__, prover=__version__);
91 geturls_ua_googlebot_google = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
92 geturls_ua_googlebot_google_old = "Googlebot/2.1 (+http://www.google.com/bot.html)";
93 geturls_ua = geturls_ua_firefox_windows7;
94 geturls_headers_firefox_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
95 geturls_headers_seamonkey_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
96 geturls_headers_chrome_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
97 geturls_headers_chromium_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
98 geturls_headers_midori_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_midori_windows7, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
99 geturls_headers_palemoon_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
100 geturls_headers_opera_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
101 geturls_headers_vivaldi_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
102 geturls_headers_internet_explorer_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
103 geturls_headers_pywwwget_python = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
104 geturls_headers_pywwwget_python_alt = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
105 geturls_headers_googlebot_google = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
106 geturls_headers_googlebot_google_old = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
107 geturls_headers = geturls_headers_firefox_windows7;
108 geturls_download_sleep = 0;
110 def add_url_param(url, **params):
111 n=3;
112 parts = list(urlparse.urlsplit(url));
113 d = dict(cgi.parse_qsl(parts[n])); # use cgi.parse_qs for list values
114 d.update(params);
115 parts[n]=urlencode(d);
116 return urlparse.urlunsplit(parts);
118 os.environ["PATH"] = os.environ["PATH"] + os.pathsep + os.path.dirname(os.path.realpath(__file__)) + os.pathsep + os.getcwd();
119 def which_exec(execfile):
120 for path in os.environ["PATH"].split(":"):
121 if os.path.exists(path + "/" + execfile):
122 return path + "/" + execfile;
124 def listize(varlist):
125 il = 0;
126 ix = len(varlist);
127 ilx = 1;
128 newlistreg = {};
129 newlistrev = {};
130 newlistfull = {};
131 while(il < ix):
132 newlistreg.update({ilx: varlist[il]});
133 newlistrev.update({varlist[il]: ilx});
134 ilx = ilx + 1;
135 il = il + 1;
136 newlistfull = {1: newlistreg, 2: newlistrev, 'reg': newlistreg, 'rev': newlistrev};
137 return newlistfull;
139 def twolistize(varlist):
140 il = 0;
141 ix = len(varlist);
142 ilx = 1;
143 newlistnamereg = {};
144 newlistnamerev = {};
145 newlistdescreg = {};
146 newlistdescrev = {};
147 newlistfull = {};
148 while(il < ix):
149 newlistnamereg.update({ilx: varlist[il][0].strip()});
150 newlistnamerev.update({varlist[il][0].strip(): ilx});
151 newlistdescreg.update({ilx: varlist[il][1].strip()});
152 newlistdescrev.update({varlist[il][1].strip(): ilx});
153 ilx = ilx + 1;
154 il = il + 1;
155 newlistnametmp = {1: newlistnamereg, 2: newlistnamerev, 'reg': newlistnamereg, 'rev': newlistnamerev};
156 newlistdesctmp = {1: newlistdescreg, 2: newlistdescrev, 'reg': newlistdescreg, 'rev': newlistdescrev};
157 newlistfull = {1: newlistnametmp, 2: newlistdesctmp, 'name': newlistnametmp, 'desc': newlistdesctmp}
158 return newlistfull;
160 def arglistize(proexec, *varlist):
161 il = 0;
162 ix = len(varlist);
163 ilx = 1;
164 newarglist = [proexec];
165 while(il < ix):
166 if varlist[il][0] is not None:
167 newarglist.append(varlist[il][0]);
168 if varlist[il][1] is not None:
169 newarglist.append(varlist[il][1]);
170 il = il + 1;
171 return newarglist;
173 # hms_string by ArcGIS Python Recipes
174 # https://arcpy.wordpress.com/2012/04/20/146/
175 def hms_string(sec_elapsed):
176 h = int(sec_elapsed / (60 * 60));
177 m = int((sec_elapsed % (60 * 60)) / 60);
178 s = sec_elapsed % 60.0;
179 return "{}:{:>02}:{:>05.2f}".format(h, m, s);
181 # get_readable_size by Lipis
182 # http://stackoverflow.com/posts/14998888/revisions
183 def get_readable_size(bytes, precision=1, unit="IEC"):
184 unit = unit.upper();
185 if(unit!="IEC" and unit!="SI"):
186 unit = "IEC";
187 if(unit=="IEC"):
188 units = [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
189 unitswos = ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
190 unitsize = 1024.0;
191 if(unit=="SI"):
192 units = [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
193 unitswos = ["B","kB","MB","GB","TB","PB","EB","ZB"];
194 unitsize = 1000.0;
195 return_val = {};
196 orgbytes = bytes;
197 for unit in units:
198 if abs(bytes) < unitsize:
199 strformat = "%3."+str(precision)+"f%s";
200 pre_return_val = (strformat % (bytes, unit));
201 pre_return_val = re.sub(r"([0]+) ([A-Za-z]+)", r" \2", pre_return_val);
202 pre_return_val = re.sub(r"\. ([A-Za-z]+)", r" \1", pre_return_val);
203 alt_return_val = pre_return_val.split();
204 return_val = {'Bytes': orgbytes, 'ReadableWithSuffix': pre_return_val, 'ReadableWithoutSuffix': alt_return_val[0], 'ReadableSuffix': alt_return_val[1]}
205 return return_val;
206 bytes /= unitsize;
207 strformat = "%."+str(precision)+"f%s";
208 pre_return_val = (strformat % (bytes, "YiB"));
209 pre_return_val = re.sub(r"([0]+) ([A-Za-z]+)", r" \2", pre_return_val);
210 pre_return_val = re.sub(r"\. ([A-Za-z]+)", r" \1", pre_return_val);
211 alt_return_val = pre_return_val.split();
212 return_val = {'Bytes': orgbytes, 'ReadableWithSuffix': pre_return_val, 'ReadableWithoutSuffix': alt_return_val[0], 'ReadableSuffix': alt_return_val[1]}
213 return return_val;
215 def get_readable_size_from_file(infile, precision=1, unit="IEC", usehashes=False, usehashtypes="md5,sha1"):
216 unit = unit.upper();
217 usehashtypes = usehashtypes.lower();
218 getfilesize = os.path.getsize(infile);
219 return_val = get_readable_size(getfilesize, precision, unit);
220 if(usehashes==True):
221 hashtypelist = usehashtypes.split(",");
222 openfile = open(infile, "rb");
223 filecontents = openfile.read();
224 openfile.close();
225 listnumcount = 0;
226 listnumend = len(hashtypelist);
227 while(listnumcount < listnumend):
228 hashtypelistlow = hashtypelist[listnumcount].strip();
229 hashtypelistup = hashtypelistlow.upper();
230 filehash = hashlib.new(hashtypelistup);
231 filehash.update(filecontents);
232 filegethash = filehash.hexdigest();
233 return_val.update({hashtypelistup: filegethash});
234 listnumcount += 1;
235 return return_val;
237 def get_readable_size_from_string(instring, precision=1, unit="IEC", usehashes=False, usehashtypes="md5,sha1"):
238 unit = unit.upper();
239 usehashtypes = usehashtypes.lower();
240 getfilesize = len(instring);
241 return_val = get_readable_size(getfilesize, precision, unit);
242 if(usehashes==True):
243 hashtypelist = usehashtypes.split(",");
244 listnumcount = 0;
245 listnumend = len(hashtypelist);
246 while(listnumcount < listnumend):
247 hashtypelistlow = hashtypelist[listnumcount].strip();
248 hashtypelistup = hashtypelistlow.upper();
249 filehash = hashlib.new(hashtypelistup);
250 if(sys.version[0]=="2"):
251 filehash.update(instring);
252 if(sys.version[0]>="3"):
253 filehash.update(instring.encode('utf-8'));
254 filegethash = filehash.hexdigest();
255 return_val.update({hashtypelistup: filegethash});
256 listnumcount += 1;
257 return return_val;
259 def make_http_headers_from_dict_to_list(headers={'Referer': "http://google.com/", 'User-Agent': geturls_ua, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
260 if isinstance(headers, dict):
261 returnval = [];
262 if(sys.version[0]=="2"):
263 for headkey, headvalue in headers.iteritems():
264 returnval.append((headkey, headvalue));
265 if(sys.version[0]>="3"):
266 for headkey, headvalue in headers.items():
267 returnval.append((headkey, headvalue));
268 elif isinstance(headers, list):
269 returnval = headers;
270 else:
271 returnval = False;
272 return returnval;
274 def make_http_headers_from_dict_to_pycurl(headers={'Referer': "http://google.com/", 'User-Agent': geturls_ua, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
275 if isinstance(headers, dict):
276 returnval = [];
277 if(sys.version[0]=="2"):
278 for headkey, headvalue in headers.iteritems():
279 returnval.append(headkey+": "+headvalue);
280 if(sys.version[0]>="3"):
281 for headkey, headvalue in headers.items():
282 returnval.append(headkey+": "+headvalue);
283 elif isinstance(headers, list):
284 returnval = headers;
285 else:
286 returnval = False;
287 return returnval;
289 def make_http_headers_from_list_to_dict(headers=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
290 if isinstance(headers, list):
291 returnval = {};
292 mli = 0;
293 mlil = len(headers);
294 while(mli<mlil):
295 returnval.update({headers[mli][0]: headers[mli][1]});
296 mli = mli + 1;
297 elif isinstance(headers, dict):
298 returnval = headers;
299 else:
300 returnval = False;
301 return returnval;
303 def get_httplib_support(checkvalue=None):
304 global haverequests, havemechanize;
305 returnval = [];
306 returnval.append("urllib");
307 if(haverequests==True):
308 returnval.append("requests");
309 if(havemechanize==True):
310 returnval.append("mechanize");
311 if(not checkvalue==None):
312 if(checkvalue=="urllib1" or checkvalue=="urllib2"):
313 checkvalue = "urllib";
314 if(checkvalue in returnval):
315 returnval = True;
316 else:
317 returnval = False;
318 return returnval;
320 def check_httplib_support(checkvalue="urllib"):
321 if(checkvalue=="urllib1" or checkvalue=="urllib2"):
322 checkvalue = "urllib";
323 returnval = get_httplib_support(checkvalue);
324 return returnval;
326 def get_httplib_support_list():
327 returnval = get_httplib_support(None);
328 return returnval;
330 def download_from_url(httpurl, httpheaders, httpcookie, httplibuse="urllib", sleep=-1):
331 global geturls_download_sleep, haverequests, havemechanize;
332 if(sleep<0):
333 sleep = geturls_download_sleep;
334 if(httplibuse=="urllib1" or httplibuse=="urllib2"):
335 httplibuse = "urllib";
336 if(haverequests==False and httplibuse=="requests"):
337 httplibuse = "urllib";
338 if(havemechanize==False and httplibuse=="mechanize"):
339 httplibuse = "urllib";
340 if(httplibuse=="urllib"):
341 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpcookie, sleep);
342 elif(httplibuse=="requests"):
343 returnval = download_from_url_with_requests(httpurl, httpheaders, httpcookie, sleep);
344 elif(httplibuse=="mechanize"):
345 returnval = download_from_url_with_mechanize(httpurl, httpheaders, httpcookie, sleep);
346 else:
347 returnval = False;
348 return returnval;
350 def download_from_url_file(httpurl, httpheaders, httpcookie, httplibuse="urllib", buffersize=524288, sleep=-1):
351 global geturls_download_sleep, haverequests, havemechanize;
352 if(sleep<0):
353 sleep = geturls_download_sleep;
354 if(httplibuse=="urllib1" or httplibuse=="urllib2"):
355 httplibuse = "urllib";
356 if(haverequests==False and httplibuse=="requests"):
357 httplibuse = "urllib";
358 if(havemechanize==False and httplibuse=="mechanize"):
359 httplibuse = "urllib";
360 if(httplibuse=="urllib"):
361 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize, sleep);
362 elif(httplibuse=="requests"):
363 returnval = download_from_url_file_with_requests(httpurl, httpheaders, httpcookie, buffersize, sleep);
364 elif(httplibuse=="mechanize"):
365 returnval = download_from_url_file_with_mechanize(httpurl, httpheaders, httpcookie, buffersize, sleep);
366 else:
367 returnval = False;
368 return returnval;
370 def download_from_url_to_file(httpurl, httpheaders, httpcookie, httplibuse="urllib", outfile="-", outpath=os.getcwd(), buffersize=[524288, 524288], sleep=-1):
371 global geturls_download_sleep, haverequests, havemechanize;
372 if(sleep<0):
373 sleep = geturls_download_sleep;
374 if(httplibuse=="urllib1" or httplibuse=="urllib2"):
375 httplibuse = "urllib";
376 if(haverequests==False and httplibuse=="requests"):
377 httplibuse = "urllib";
378 if(havemechanize==False and httplibuse=="mechanize"):
379 httplibuse = "urllib";
380 if(httplibuse=="urllib"):
381 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpcookie, outfile, outpath, buffersize, sleep);
382 elif(httplibuse=="requests"):
383 returnval = download_from_url_to_file_with_requests(httpurl, httpheaders, httpcookie, outfile, outpath, buffersize, sleep);
384 elif(httplibuse=="mechanize"):
385 returnval = download_from_url_to_file_with_mechanize(httpurl, httpheaders, httpcookie, outfile, outpath, buffersize, sleep);
386 else:
387 returnval = False;
388 return returnval;
390 def download_from_url_with_urllib(httpurl, httpheaders, httpcookie, sleep=-1):
391 global geturls_download_sleep;
392 if(sleep<0):
393 sleep = geturls_download_sleep;
394 geturls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(httpcookie));
395 if(isinstance(httpheaders, dict)):
396 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
397 geturls_opener.addheaders = httpheaders;
398 time.sleep(sleep);
399 geturls_text = geturls_opener.open(httpurl);
400 log.info("Downloading URL "+httpurl);
401 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
402 if(sys.version[0]=="2"):
403 strbuf = StringIO(geturls_text.read());
404 if(sys.version[0]>="3"):
405 strbuf = BytesIO(geturls_text.read());
406 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
407 returnval_content = gzstrbuf.read()[:];
408 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
409 returnval_content = geturls_text.read()[:];
410 returnval = {'Type': "Content", 'Content': returnval_content, 'Headers': dict(geturls_text.info()), 'URL': geturls_text.geturl(), 'Code': geturls_text.getcode()};
411 geturls_text.close();
412 return returnval;
414 def download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize=524288, sleep=-1):
415 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
416 exec_time_start = time.time();
417 myhash = hashlib.new("sha1");
418 if(sys.version[0]=="2"):
419 myhash.update(httpurl);
420 myhash.update(str(buffersize));
421 myhash.update(str(exec_time_start));
422 if(sys.version[0]>="3"):
423 myhash.update(httpurl.encode('utf-8'));
424 myhash.update(str(buffersize).encode('utf-8'));
425 myhash.update(str(exec_time_start).encode('utf-8'));
426 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
427 if(sleep<0):
428 sleep = geturls_download_sleep;
429 geturls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(httpcookie));
430 if(isinstance(httpheaders, dict)):
431 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
432 geturls_opener.addheaders = httpheaders;
433 time.sleep(sleep);
434 geturls_text = geturls_opener.open(httpurl);
435 downloadsize = geturls_text.info().get('Content-Length');
436 if(downloadsize is not None):
437 downloadsize = int(downloadsize);
438 if downloadsize is None: downloadsize = 0;
439 fulldatasize = 0;
440 prevdownsize = 0;
441 log.info("Downloading URL "+httpurl);
442 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
443 tmpfilename = f.name;
444 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'Headers': dict(geturls_text.info()), 'URL': geturls_text.geturl(), 'Code': geturls_text.getcode()};
445 while True:
446 databytes = geturls_text.read(buffersize);
447 if not databytes: break;
448 datasize = len(databytes);
449 fulldatasize = datasize + fulldatasize;
450 percentage = "";
451 if(downloadsize>0):
452 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
453 downloaddiff = fulldatasize - prevdownsize;
454 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
455 prevdownsize = fulldatasize;
456 f.write(databytes);
457 f.close();
458 geturls_text.close();
459 exec_time_end = time.time();
460 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
461 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
462 return returnval;
464 def download_from_url_to_file_with_urllib(httpurl, httpheaders, httpcookie, outfile="-", outpath=os.getcwd(), buffersize=[524288, 524288], sleep=-1):
465 global geturls_download_sleep;
466 if(sleep<0):
467 sleep = geturls_download_sleep;
468 if(not outfile=="-"):
469 outpath = outpath.rstrip(os.path.sep);
470 filepath = os.path.realpath(outpath+os.path.sep+outfile);
471 if(not os.path.exists(outpath)):
472 os.makedirs(outpath);
473 if(os.path.exists(outpath) and os.path.isfile(outpath)):
474 return False;
475 if(os.path.exists(filepath) and os.path.isdir(filepath)):
476 return False;
477 pretmpfilename = download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
478 tmpfilename = pretmpfilename['Filename'];
479 downloadsize = os.path.getsize(tmpfilename);
480 fulldatasize = 0;
481 log.info("Moving file "+tmpfilename+" to "+filepath);
482 exec_time_start = time.time();
483 shutil.move(tmpfilename, filepath);
484 exec_time_end = time.time();
485 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
486 if(os.path.exists(tmpfilename)==True):
487 os.remove(tmpfilename);
488 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
489 if(outfile=="-" and sys.version[0]=="2"):
490 pretmpfilename = download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
491 tmpfilename = pretmpfilename['Filename'];
492 downloadsize = os.path.getsize(tmpfilename);
493 fulldatasize = 0;
494 prevdownsize = 0;
495 exec_time_start = time.time();
496 with open(tmpfilename, 'rb') as ft:
497 f = StringIO();
498 while True:
499 databytes = ft.read(buffersize[1]);
500 if not databytes: break;
501 datasize = len(databytes);
502 fulldatasize = datasize + fulldatasize;
503 percentage = "";
504 if(downloadsize>0):
505 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
506 downloaddiff = fulldatasize - prevdownsize;
507 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
508 prevdownsize = fulldatasize;
509 f.write(databytes);
510 f.seek(0);
511 fdata = f.getvalue();
512 f.close();
513 ft.close();
514 os.remove(tmpfilename);
515 exec_time_end = time.time();
516 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
517 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
518 if(outfile=="-" and sys.version[0]>="3"):
519 pretmpfilename = download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
520 tmpfilename = pretmpfilename['Filename'];
521 downloadsize = os.path.getsize(tmpfilename);
522 fulldatasize = 0;
523 prevdownsize = 0;
524 exec_time_start = time.time();
525 with open(tmpfilename, 'rb') as ft:
526 f = BytesIO();
527 while True:
528 databytes = ft.read(buffersize[1]);
529 if not databytes: break;
530 datasize = len(databytes);
531 fulldatasize = datasize + fulldatasize;
532 percentage = "";
533 if(downloadsize>0):
534 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
535 downloaddiff = fulldatasize - prevdownsize;
536 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
537 prevdownsize = fulldatasize;
538 f.write(databytes);
539 f.seek(0);
540 fdata = f.getvalue();
541 f.close();
542 ft.close();
543 os.remove(tmpfilename);
544 exec_time_end = time.time();
545 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
546 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
547 return returnval;
549 if(haverequests==True):
550 def download_from_url_with_requests(httpurl, httpheaders, httpcookie, sleep=-1):
551 global geturls_download_sleep;
552 if(sleep<0):
553 sleep = geturls_download_sleep;
554 if(isinstance(httpheaders, list)):
555 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
556 time.sleep(sleep);
557 geturls_text = requests.get(httpurl, headers=httpheaders, cookies=httpcookie);
558 log.info("Downloading URL "+httpurl);
559 if(geturls_text.headers.get('Content-Type')=="gzip" or geturls_text.headers.get('Content-Type')=="deflate"):
560 if(sys.version[0]=="2"):
561 strbuf = StringIO(geturls_text.content);
562 if(sys.version[0]>="3"):
563 strbuf = BytesIO(geturls_text.content);
564 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
565 returnval_content = gzstrbuf.content[:];
566 if(geturls_text.headers.get('Content-Type')!="gzip" and geturls_text.headers.get('Content-Type')!="deflate"):
567 returnval_content = geturls_text.content[:];
568 returnval = {'Type': "Content", 'Content': returnval_content, 'Headers': dict(geturls_text.headers), 'URL': geturls_text.url, 'Code': geturls_text.status_code};
569 geturls_text.close();
570 return returnval;
572 if(haverequests==False):
573 def download_from_url_with_requests(httpurl, httpheaders, httpcookie, sleep=-1):
574 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpcookie, sleep)
575 return returnval;
577 if(haverequests==True):
578 def download_from_url_file_with_requests(httpurl, httpheaders, httpcookie, buffersize=524288, sleep=-1):
579 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
580 exec_time_start = time.time();
581 myhash = hashlib.new("sha1");
582 if(sys.version[0]=="2"):
583 myhash.update(httpurl);
584 myhash.update(str(buffersize));
585 myhash.update(str(exec_time_start));
586 if(sys.version[0]>="3"):
587 myhash.update(httpurl.encode('utf-8'));
588 myhash.update(str(buffersize).encode('utf-8'));
589 myhash.update(str(exec_time_start).encode('utf-8'));
590 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
591 if(sleep<0):
592 sleep = geturls_download_sleep;
593 if(isinstance(httpheaders, list)):
594 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
595 time.sleep(sleep);
596 geturls_text = requests.get(httpurl, headers=httpheaders, cookies=httpcookie, stream=True);
597 downloadsize = int(geturls_text.headers.get('Content-Length'));
598 if(downloadsize is not None):
599 downloadsize = int(downloadsize);
600 if downloadsize is None: downloadsize = 0;
601 fulldatasize = 0;
602 prevdownsize = 0;
603 log.info("Downloading URL "+httpurl);
604 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
605 tmpfilename = f.name;
606 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'Headers': dict(geturls_text.headers), 'URL': geturls_text.url, 'Code': geturls_text.status_code};
607 for databytes in geturls_text.iter_content(chunk_size=buffersize):
608 datasize = len(databytes);
609 fulldatasize = datasize + fulldatasize;
610 percentage = "";
611 if(downloadsize>0):
612 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
613 downloaddiff = fulldatasize - prevdownsize;
614 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
615 prevdownsize = fulldatasize;
616 f.write(databytes);
617 f.close();
618 geturls_text.close();
619 exec_time_end = time.time();
620 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
621 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
622 return returnval;
624 if(haverequests==False):
625 def download_from_url_file_with_requests(httpurl, httpheaders, httpcookie, buffersize=524288, sleep=-1):
626 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize, sleep)
627 return returnval;
629 if(haverequests==True):
630 def download_from_url_to_file_with_requests(httpurl, httpheaders, httpcookie, outfile="-", outpath=os.getcwd(), buffersize=[524288, 524288], sleep=-1):
631 global geturls_download_sleep;
632 if(sleep<0):
633 sleep = geturls_download_sleep;
634 if(not outfile=="-"):
635 outpath = outpath.rstrip(os.path.sep);
636 filepath = os.path.realpath(outpath+os.path.sep+outfile);
637 if(not os.path.exists(outpath)):
638 os.makedirs(outpath);
639 if(os.path.exists(outpath) and os.path.isfile(outpath)):
640 return False;
641 if(os.path.exists(filepath) and os.path.isdir(filepath)):
642 return False;
643 pretmpfilename = download_from_url_file_with_requests(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
644 tmpfilename = pretmpfilename['Filename'];
645 downloadsize = os.path.getsize(tmpfilename);
646 fulldatasize = 0;
647 log.info("Moving file "+tmpfilename+" to "+filepath);
648 exec_time_start = time.time();
649 shutil.move(tmpfilename, filepath);
650 exec_time_end = time.time();
651 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
652 if(os.path.exists(tmpfilename)==True):
653 os.remove(tmpfilename);
654 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
655 if(outfile=="-" and sys.version[0]=="2"):
656 pretmpfilename = download_from_url_file_with_requests(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
657 tmpfilename = pretmpfilename['Filename'];
658 downloadsize = os.path.getsize(tmpfilename);
659 fulldatasize = 0;
660 prevdownsize = 0;
661 exec_time_start = time.time();
662 with open(tmpfilename, 'rb') as ft:
663 f = StringIO();
664 while True:
665 databytes = ft.read(buffersize[1]);
666 if not databytes: break;
667 datasize = len(databytes);
668 fulldatasize = datasize + fulldatasize;
669 percentage = "";
670 if(downloadsize>0):
671 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
672 downloaddiff = fulldatasize - prevdownsize;
673 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
674 prevdownsize = fulldatasize;
675 f.write(databytes);
676 f.seek(0);
677 fdata = f.getvalue();
678 f.close();
679 ft.close();
680 os.remove(tmpfilename);
681 exec_time_end = time.time();
682 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
683 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
684 if(outfile=="-" and sys.version[0]>="3"):
685 pretmpfilename = download_from_url_file_with_requests(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
686 tmpfilename = pretmpfilename['Filename'];
687 downloadsize = os.path.getsize(tmpfilename);
688 fulldatasize = 0;
689 prevdownsize = 0;
690 exec_time_start = time.time();
691 with open(tmpfilename, 'rb') as ft:
692 f = BytesIO();
693 while True:
694 databytes = ft.read(buffersize[1]);
695 if not databytes: break;
696 datasize = len(databytes);
697 fulldatasize = datasize + fulldatasize;
698 percentage = "";
699 if(downloadsize>0):
700 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
701 downloaddiff = fulldatasize - prevdownsize;
702 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
703 prevdownsize = fulldatasize;
704 f.write(databytes);
705 f.seek(0);
706 fdata = f.getvalue();
707 f.close();
708 ft.close();
709 os.remove(tmpfilename);
710 exec_time_end = time.time();
711 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
712 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
713 return returnval;
715 if(haverequests==False):
716 def download_from_url_to_file_with_requests(httpurl, httpheaders, httpcookie, outfile="-", outpath=os.getcwd(), buffersize=[524288, 524288], sleep=-1):
717 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize, outfile, outpath, sleep)
718 return returnval;
720 if(havemechanize==True):
721 def download_from_url_with_mechanize(httpurl, httpheaders, httpcookie, sleep=-1):
722 global geturls_download_sleep;
723 if(sleep<0):
724 sleep = geturls_download_sleep;
725 geturls_opener = mechanize.Browser();
726 if(isinstance(httpheaders, dict)):
727 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
728 time.sleep(sleep);
729 geturls_opener.addheaders = httpheaders;
730 geturls_opener.set_cookiejar(httpcookie);
731 geturls_opener.set_handle_robots(False);
732 geturls_text = geturls_opener.open(httpurl);
733 log.info("Downloading URL "+httpurl);
734 if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
735 if(sys.version[0]=="2"):
736 strbuf = StringIO(geturls_text.read());
737 if(sys.version[0]>="3"):
738 strbuf = BytesIO(geturls_text.read());
739 gzstrbuf = gzip.GzipFile(fileobj=strbuf);
740 returnval_content = gzstrbuf.read()[:];
741 if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
742 returnval_content = geturls_text.read()[:];
743 returnval = {'Type': "Content", 'Content': returnval_content, 'Headers': dict(geturls_text.info()), 'URL': geturls_text.geturl(), 'Code': geturls_text.code};
744 geturls_text.close();
745 return returnval;
747 if(havemechanize==False):
748 def download_from_url_with_mechanize(httpurl, httpheaders, httpcookie, sleep=-1):
749 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpcookie, sleep)
750 return returnval;
752 if(havemechanize==True):
753 def download_from_url_file_with_mechanize(httpurl, httpheaders, httpcookie, buffersize=524288, sleep=-1):
754 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
755 exec_time_start = time.time();
756 myhash = hashlib.new("sha1");
757 if(sys.version[0]=="2"):
758 myhash.update(httpurl);
759 myhash.update(str(buffersize));
760 myhash.update(str(exec_time_start));
761 if(sys.version[0]>="3"):
762 myhash.update(httpurl.encode('utf-8'));
763 myhash.update(str(buffersize).encode('utf-8'));
764 myhash.update(str(exec_time_start).encode('utf-8'));
765 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
766 if(sleep<0):
767 sleep = geturls_download_sleep;
768 geturls_opener = mechanize.Browser();
769 if(isinstance(httpheaders, dict)):
770 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
771 time.sleep(sleep);
772 geturls_opener.addheaders = httpheaders;
773 geturls_opener.set_cookiejar(httpcookie);
774 geturls_opener.set_handle_robots(False);
775 geturls_text = geturls_opener.open(httpurl);
776 downloadsize = int(geturls_text.info().get('Content-Length'));
777 if(downloadsize is not None):
778 downloadsize = int(downloadsize);
779 if downloadsize is None: downloadsize = 0;
780 fulldatasize = 0;
781 prevdownsize = 0;
782 log.info("Downloading URL "+httpurl);
783 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
784 tmpfilename = f.name;
785 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'Headers': dict(geturls_text.info()), 'URL': geturls_text.geturl(), 'Code': geturls_text.code};
786 while True:
787 databytes = geturls_text.read(buffersize);
788 if not databytes: break;
789 datasize = len(databytes);
790 fulldatasize = datasize + fulldatasize;
791 percentage = "";
792 if(downloadsize>0):
793 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
794 downloaddiff = fulldatasize - prevdownsize;
795 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
796 prevdownsize = fulldatasize;
797 f.write(databytes);
798 f.close();
799 geturls_text.close();
800 exec_time_end = time.time();
801 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
802 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
803 return returnval;
805 if(havemechanize==False):
806 def download_from_url_file_with_mechanize(httpurl, httpheaders, httpcookie, buffersize=524288, sleep=-1):
807 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize, sleep)
808 return returnval;
810 if(havemechanize==True):
811 def download_from_url_to_file_with_mechanize(httpurl, httpheaders, httpcookie, outfile="-", outpath=os.getcwd(), buffersize=[524288, 524288], sleep=-1):
812 global geturls_download_sleep;
813 if(sleep<0):
814 sleep = geturls_download_sleep;
815 if(not outfile=="-"):
816 outpath = outpath.rstrip(os.path.sep);
817 filepath = os.path.realpath(outpath+os.path.sep+outfile);
818 if(not os.path.exists(outpath)):
819 os.makedirs(outpath);
820 if(os.path.exists(outpath) and os.path.isfile(outpath)):
821 return False;
822 if(os.path.exists(filepath) and os.path.isdir(filepath)):
823 return False;
824 pretmpfilename = download_from_url_file_with_mechanize(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
825 tmpfilename = pretmpfilename['Filename'];
826 downloadsize = os.path.getsize(tmpfilename);
827 fulldatasize = 0;
828 log.info("Moving file "+tmpfilename+" to "+filepath);
829 exec_time_start = time.time();
830 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
831 shutil.move(tmpfilename, filepath);
832 if(os.path.exists(tmpfilename)==True):
833 os.remove(tmpfilename);
834 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
835 if(outfile=="-" and sys.version[0]=="2"):
836 pretmpfilename = download_from_url_file_with_mechanize(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
837 tmpfilename = pretmpfilename['Filename'];
838 downloadsize = os.path.getsize(tmpfilename);
839 fulldatasize = 0;
840 prevdownsize = 0;
841 exec_time_start = time.time();
842 with open(tmpfilename, 'rb') as ft:
843 f = StringIO();
844 while True:
845 databytes = ft.read(buffersize[1]);
846 if not databytes: break;
847 datasize = len(databytes);
848 fulldatasize = datasize + fulldatasize;
849 percentage = "";
850 if(downloadsize>0):
851 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
852 downloaddiff = fulldatasize - prevdownsize;
853 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
854 prevdownsize = fulldatasize;
855 f.write(databytes);
856 f.seek(0);
857 fdata = f.getvalue();
858 f.close();
859 ft.close();
860 os.remove(tmpfilename);
861 exec_time_end = time.time();
862 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
863 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
864 if(outfile=="-" and sys.version[0]>="3"):
865 pretmpfilename = download_from_url_file_with_mechanize(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
866 tmpfilename = pretmpfilename['Filename'];
867 downloadsize = os.path.getsize(tmpfilename);
868 fulldatasize = 0;
869 prevdownsize = 0;
870 exec_time_start = time.time();
871 with open(tmpfilename, 'rb') as ft:
872 f = BytesIO();
873 while True:
874 databytes = ft.read(buffersize[1]);
875 if not databytes: break;
876 datasize = len(databytes);
877 fulldatasize = datasize + fulldatasize;
878 percentage = "";
879 if(downloadsize>0):
880 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
881 downloaddiff = fulldatasize - prevdownsize;
882 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
883 prevdownsize = fulldatasize;
884 f.write(databytes);
885 f.seek(0);
886 fdata = f.getvalue();
887 f.close();
888 ft.close();
889 os.remove(tmpfilename);
890 exec_time_end = time.time();
891 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
892 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
893 return returnval;
895 if(havemechanize==False):
896 def download_from_url_to_file_with_mechanize(httpurl, httpheaders, httpcookie, outfile="-", outpath=os.getcwd(), buffersize=[524288, 524288], sleep=-1):
897 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize, outfile, outpath, sleep)
898 return returnval;