Add files via upload
[PyWWW-Get.git] / pywwwgetold.py
blob721fdaf93291814f212e577b76894e97c422a755
1 #!/usr/bin/env python
3 '''
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwgetold.py - Last Update: 10/5/2023 Ver. 2.0.2 RC 1 - Author: cooldude2k $
16 '''
18 from __future__ import division, absolute_import, print_function;
19 import re, os, sys, hashlib, shutil, platform, tempfile, urllib, zlib, time, argparse, cgi, subprocess, socket, email.utils, datetime, time;
20 import logging as log;
21 from ftplib import FTP, FTP_TLS;
22 from base64 import b64encode;
23 haverequests = False;
24 try:
25 import requests;
26 haverequests = True;
27 except ImportError:
28 haverequests = False;
29 havemechanize = False;
30 try:
31 import mechanize;
32 havemechanize = True;
33 except ImportError:
34 havemechanize = False;
35 havepycurl = False;
36 try:
37 import pycurl;
38 havepycurl = True;
39 except ImportError:
40 havepycurl = False;
41 haveparamiko = False;
42 try:
43 import paramiko;
44 haveparamiko = True;
45 except ImportError:
46 haveparamiko = False;
47 havepysftp = False;
48 try:
49 import pysftp;
50 havepysftp = True;
51 except ImportError:
52 havepysftp = False;
53 haveurllib3 = False;
54 try:
55 import urllib3;
56 haveurllib3 = True;
57 except ImportError:
58 haveurllib3 = False;
59 havehttplib2 = False;
60 try:
61 import httplib2;
62 from httplib2 import HTTPConnectionWithTimeout, HTTPSConnectionWithTimeout;
63 havehttplib2 = True;
64 except ImportError:
65 havehttplib2 = False;
66 havehttpx = False;
67 try:
68 import httpx;
69 havehttpx = True;
70 except ImportError:
71 havehttpx = False;
72 havehttpcore = False;
73 try:
74 import httpcore;
75 havehttpcore = True;
76 except ImportError:
77 havehttpcore = False;
78 havebrotli = False;
79 try:
80 import brotli;
81 havebrotli = True;
82 except ImportError:
83 havebrotli = False;
84 havezstd = False;
85 try:
86 import zstandard;
87 havezstd = True;
88 except ImportError:
89 havezstd = False;
90 if(sys.version[0]=="2"):
91 try:
92 from io import StringIO, BytesIO;
93 except ImportError:
94 try:
95 from cStringIO import StringIO;
96 from cStringIO import StringIO as BytesIO;
97 except ImportError:
98 from StringIO import StringIO;
99 from StringIO import StringIO as BytesIO;
100 # From http://python-future.org/compatible_idioms.html
101 from urlparse import urlparse, urlunparse, urlsplit, urlunsplit, urljoin;
102 from urllib import urlencode;
103 from urllib import urlopen as urlopenalt;
104 from urllib2 import urlopen, Request, install_opener, HTTPError, URLError, build_opener, HTTPCookieProcessor;
105 import urlparse, cookielib;
106 from httplib import HTTPConnection, HTTPSConnection;
107 if(sys.version[0]>="3"):
108 from io import StringIO, BytesIO;
109 # From http://python-future.org/compatible_idioms.html
110 from urllib.parse import urlparse, urlunparse, urlsplit, urlunsplit, urljoin, urlencode;
111 from urllib.request import urlopen, Request, install_opener, build_opener, HTTPCookieProcessor;
112 from urllib.error import HTTPError, URLError;
113 import urllib.parse as urlparse;
114 import http.cookiejar as cookielib;
115 from http.client import HTTPConnection, HTTPSConnection;
117 __program_name__ = "PyWWW-Get";
118 __program_alt_name__ = "PyWWWGet";
119 __program_small_name__ = "wwwget";
120 __project__ = __program_name__;
121 __project_url__ = "https://github.com/GameMaker2k/PyWWW-Get";
122 __version_info__ = (2, 0, 2, "RC 1", 1);
123 __version_date_info__ = (2023, 10, 5, "RC 1", 1);
124 __version_date__ = str(__version_date_info__[0])+"."+str(__version_date_info__[1]).zfill(2)+"."+str(__version_date_info__[2]).zfill(2);
125 __revision__ = __version_info__[3];
126 __revision_id__ = "$Id$";
127 if(__version_info__[4] is not None):
128 __version_date_plusrc__ = __version_date__+"-"+str(__version_date_info__[4]);
129 if(__version_info__[4] is None):
130 __version_date_plusrc__ = __version_date__;
131 if(__version_info__[3] is not None):
132 __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2])+" "+str(__version_info__[3]);
133 if(__version_info__[3] is None):
134 __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2]);
136 tmpfileprefix = "py"+str(sys.version_info[0])+__program_small_name__+str(__version_info__[0])+"-";
137 tmpfilesuffix = "-";
138 pytempdir = tempfile.gettempdir();
140 PyBitness = platform.architecture();
141 if(PyBitness=="32bit" or PyBitness=="32"):
142 PyBitness = "32";
143 elif(PyBitness=="64bit" or PyBitness=="64"):
144 PyBitness = "64";
145 else:
146 PyBitness = "32";
148 compression_supported = "gzip, deflate";
149 if(havebrotli):
150 compression_supported = "gzip, deflate, br";
151 else:
152 compression_supported = "gzip, deflate";
154 geturls_cj = cookielib.CookieJar();
155 windowsNT4_ua_string = "Windows NT 4.0";
156 windowsNT4_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
157 windows2k_ua_string = "Windows NT 5.0";
158 windows2k_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
159 windowsXP_ua_string = "Windows NT 5.1";
160 windowsXP_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
161 windowsXP64_ua_string = "Windows NT 5.2; Win64; x64";
162 windowsXP64_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
163 windows7_ua_string = "Windows NT 6.1; Win64; x64";
164 windows7_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
165 windows8_ua_string = "Windows NT 6.2; Win64; x64";
166 windows8_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
167 windows81_ua_string = "Windows NT 6.3; Win64; x64";
168 windows81_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
169 windows10_ua_string = "Windows NT 10.0; Win64; x64";
170 windows10_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
171 windows11_ua_string = "Windows NT 11.0; Win64; x64";
172 windows11_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
173 geturls_ua_firefox_windows7 = "Mozilla/5.0 ("+windows7_ua_string+"; rv:109.0) Gecko/20100101 Firefox/117.0";
174 geturls_ua_seamonkey_windows7 = "Mozilla/5.0 ("+windows7_ua_string+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
175 geturls_ua_chrome_windows7 = "Mozilla/5.0 ("+windows7_ua_string+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
176 geturls_ua_chromium_windows7 = "Mozilla/5.0 ("+windows7_ua_string+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
177 geturls_ua_palemoon_windows7 = "Mozilla/5.0 ("+windows7_ua_string+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
178 geturls_ua_opera_windows7 = "Mozilla/5.0 ("+windows7_ua_string+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
179 geturls_ua_vivaldi_windows7 = "Mozilla/5.0 ("+windows7_ua_string+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
180 geturls_ua_internet_explorer_windows7 = "Mozilla/5.0 ("+windows7_ua_string+"; Trident/7.0; rv:11.0) like Gecko";
181 geturls_ua_microsoft_edge_windows7 = "Mozilla/5.0 ("+windows7_ua_string+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
182 geturls_ua_pywwwget_python = "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname=__project__, prover=__version__, prourl=__project_url__);
183 if(platform.python_implementation()!=""):
184 py_implementation = platform.python_implementation();
185 if(platform.python_implementation()==""):
186 py_implementation = "Python";
187 geturls_ua_pywwwget_python_alt = "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver=platform.system()+" "+platform.release(), archtype=platform.machine(), prourl=__project_url__, pyimp=py_implementation, pyver=platform.python_version(), proname=__project__, prover=__version__);
188 geturls_ua_googlebot_google = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
189 geturls_ua_googlebot_google_old = "Googlebot/2.1 (+http://www.google.com/bot.html)";
190 geturls_ua = geturls_ua_firefox_windows7;
191 geturls_headers_firefox_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
192 geturls_headers_seamonkey_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
193 geturls_headers_chrome_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
194 geturls_headers_chrome_windows7.update(windows7_ua_addon);
195 geturls_headers_chromium_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
196 geturls_headers_chromium_windows7.update(windows7_ua_addon);
197 geturls_headers_palemoon_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
198 geturls_headers_opera_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
199 geturls_headers_opera_windows7.update(windows7_ua_addon);
200 geturls_headers_vivaldi_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
201 geturls_headers_vivaldi_windows7.update(windows7_ua_addon);
202 geturls_headers_internet_explorer_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
203 geturls_headers_microsoft_edge_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
204 geturls_headers_microsoft_edge_windows7.update(windows7_ua_addon);
205 geturls_headers_pywwwget_python = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__+"\";v=\""+str(__version__)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation+"\";v=\""+str(platform.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__), 'SEC-CH-UA-PLATFORM': ""+py_implementation+"", 'SEC-CH-UA-ARCH': ""+platform.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__), 'SEC-CH-UA-BITNESS': str(PyBitness)};
206 geturls_headers_pywwwget_python_alt = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__+"\";v=\""+str(__version__)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation+"\";v=\""+str(platform.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__), 'SEC-CH-UA-PLATFORM': ""+py_implementation+"", 'SEC-CH-UA-ARCH': ""+platform.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__), 'SEC-CH-UA-BITNESS': str(PyBitness)};
207 geturls_headers_googlebot_google = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
208 geturls_headers_googlebot_google_old = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
209 geturls_headers = geturls_headers_firefox_windows7;
210 geturls_download_sleep = 0;
212 def verbose_printout(dbgtxt, outtype="log", dbgenable=True, dgblevel=20):
213 if(outtype=="print" and dbgenable):
214 print(dbgtxt);
215 return True;
216 elif(outtype=="log" and dbgenable):
217 logging.info(dbgtxt);
218 return True;
219 elif(outtype=="warning" and dbgenable):
220 logging.warning(dbgtxt);
221 return True;
222 elif(outtype=="error" and dbgenable):
223 logging.error(dbgtxt);
224 return True;
225 elif(outtype=="critical" and dbgenable):
226 logging.critical(dbgtxt);
227 return True;
228 elif(outtype=="exception" and dbgenable):
229 logging.exception(dbgtxt);
230 return True;
231 elif(outtype=="logalt" and dbgenable):
232 logging.log(dgblevel, dbgtxt);
233 return True;
234 elif(outtype=="debug" and dbgenable):
235 logging.debug(dbgtxt);
236 return True;
237 elif(not dbgenable):
238 return True;
239 else:
240 return False;
241 return False;
243 def verbose_printout_return(dbgtxt, outtype="log", dbgenable=True, dgblevel=20):
244 dbgout = verbose_printout(dbgtxt, outtype, dbgenable, dgblevel);
245 if(not dbgout):
246 return False;
247 return dbgtxt;
249 def add_url_param(url, **params):
250 n=3;
251 parts = list(urlparse.urlsplit(url));
252 d = dict(cgi.parse_qsl(parts[n])); # use cgi.parse_qs for list values
253 d.update(params);
254 parts[n]=urlencode(d);
255 return urlparse.urlunsplit(parts);
257 os.environ["PATH"] = os.environ["PATH"] + os.pathsep + os.path.dirname(os.path.realpath(__file__)) + os.pathsep + os.getcwd();
258 def which_exec(execfile):
259 for path in os.environ["PATH"].split(":"):
260 if os.path.exists(path + "/" + execfile):
261 return path + "/" + execfile;
263 def listize(varlist):
264 il = 0;
265 ix = len(varlist);
266 ilx = 1;
267 newlistreg = {};
268 newlistrev = {};
269 newlistfull = {};
270 while(il < ix):
271 newlistreg.update({ilx: varlist[il]});
272 newlistrev.update({varlist[il]: ilx});
273 ilx = ilx + 1;
274 il = il + 1;
275 newlistfull = {1: newlistreg, 2: newlistrev, 'reg': newlistreg, 'rev': newlistrev};
276 return newlistfull;
278 def twolistize(varlist):
279 il = 0;
280 ix = len(varlist);
281 ilx = 1;
282 newlistnamereg = {};
283 newlistnamerev = {};
284 newlistdescreg = {};
285 newlistdescrev = {};
286 newlistfull = {};
287 while(il < ix):
288 newlistnamereg.update({ilx: varlist[il][0].strip()});
289 newlistnamerev.update({varlist[il][0].strip(): ilx});
290 newlistdescreg.update({ilx: varlist[il][1].strip()});
291 newlistdescrev.update({varlist[il][1].strip(): ilx});
292 ilx = ilx + 1;
293 il = il + 1;
294 newlistnametmp = {1: newlistnamereg, 2: newlistnamerev, 'reg': newlistnamereg, 'rev': newlistnamerev};
295 newlistdesctmp = {1: newlistdescreg, 2: newlistdescrev, 'reg': newlistdescreg, 'rev': newlistdescrev};
296 newlistfull = {1: newlistnametmp, 2: newlistdesctmp, 'name': newlistnametmp, 'desc': newlistdesctmp}
297 return newlistfull;
299 def arglistize(proexec, *varlist):
300 il = 0;
301 ix = len(varlist);
302 ilx = 1;
303 newarglist = [proexec];
304 while(il < ix):
305 if varlist[il][0] is not None:
306 newarglist.append(varlist[il][0]);
307 if varlist[il][1] is not None:
308 newarglist.append(varlist[il][1]);
309 il = il + 1;
310 return newarglist;
312 def fix_header_names(header_dict):
313 if(sys.version[0]=="2"):
314 header_dict = {k.title(): v for k, v in header_dict.iteritems()};
315 if(sys.version[0]>="3"):
316 header_dict = {k.title(): v for k, v in header_dict.items()};
317 return header_dict;
319 # hms_string by ArcGIS Python Recipes
320 # https://arcpy.wordpress.com/2012/04/20/146/
321 def hms_string(sec_elapsed):
322 h = int(sec_elapsed / (60 * 60));
323 m = int((sec_elapsed % (60 * 60)) / 60);
324 s = sec_elapsed % 60.0;
325 return "{}:{:>02}:{:>05.2f}".format(h, m, s);
327 # get_readable_size by Lipis
328 # http://stackoverflow.com/posts/14998888/revisions
329 def get_readable_size(bytes, precision=1, unit="IEC"):
330 unit = unit.upper();
331 if(unit!="IEC" and unit!="SI"):
332 unit = "IEC";
333 if(unit=="IEC"):
334 units = [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
335 unitswos = ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
336 unitsize = 1024.0;
337 if(unit=="SI"):
338 units = [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
339 unitswos = ["B","kB","MB","GB","TB","PB","EB","ZB"];
340 unitsize = 1000.0;
341 return_val = {};
342 orgbytes = bytes;
343 for unit in units:
344 if abs(bytes) < unitsize:
345 strformat = "%3."+str(precision)+"f%s";
346 pre_return_val = (strformat % (bytes, unit));
347 pre_return_val = re.sub(r"([0]+) ([A-Za-z]+)", r" \2", pre_return_val);
348 pre_return_val = re.sub(r"\. ([A-Za-z]+)", r" \1", pre_return_val);
349 alt_return_val = pre_return_val.split();
350 return_val = {'Bytes': orgbytes, 'ReadableWithSuffix': pre_return_val, 'ReadableWithoutSuffix': alt_return_val[0], 'ReadableSuffix': alt_return_val[1]}
351 return return_val;
352 bytes /= unitsize;
353 strformat = "%."+str(precision)+"f%s";
354 pre_return_val = (strformat % (bytes, "YiB"));
355 pre_return_val = re.sub(r"([0]+) ([A-Za-z]+)", r" \2", pre_return_val);
356 pre_return_val = re.sub(r"\. ([A-Za-z]+)", r" \1", pre_return_val);
357 alt_return_val = pre_return_val.split();
358 return_val = {'Bytes': orgbytes, 'ReadableWithSuffix': pre_return_val, 'ReadableWithoutSuffix': alt_return_val[0], 'ReadableSuffix': alt_return_val[1]}
359 return return_val;
361 def get_readable_size_from_file(infile, precision=1, unit="IEC", usehashes=False, usehashtypes="md5,sha1"):
362 unit = unit.upper();
363 usehashtypes = usehashtypes.lower();
364 getfilesize = os.path.getsize(infile);
365 return_val = get_readable_size(getfilesize, precision, unit);
366 if(usehashes):
367 hashtypelist = usehashtypes.split(",");
368 openfile = open(infile, "rb");
369 filecontents = openfile.read();
370 openfile.close();
371 listnumcount = 0;
372 listnumend = len(hashtypelist);
373 while(listnumcount < listnumend):
374 hashtypelistlow = hashtypelist[listnumcount].strip();
375 hashtypelistup = hashtypelistlow.upper();
376 filehash = hashlib.new(hashtypelistup);
377 filehash.update(filecontents);
378 filegethash = filehash.hexdigest();
379 return_val.update({hashtypelistup: filegethash});
380 listnumcount += 1;
381 return return_val;
383 def get_readable_size_from_string(instring, precision=1, unit="IEC", usehashes=False, usehashtypes="md5,sha1"):
384 unit = unit.upper();
385 usehashtypes = usehashtypes.lower();
386 getfilesize = len(instring);
387 return_val = get_readable_size(getfilesize, precision, unit);
388 if(usehashes):
389 hashtypelist = usehashtypes.split(",");
390 listnumcount = 0;
391 listnumend = len(hashtypelist);
392 while(listnumcount < listnumend):
393 hashtypelistlow = hashtypelist[listnumcount].strip();
394 hashtypelistup = hashtypelistlow.upper();
395 filehash = hashlib.new(hashtypelistup);
396 if(sys.version[0]=="2"):
397 filehash.update(instring);
398 if(sys.version[0]>="3"):
399 filehash.update(instring.encode('utf-8'));
400 filegethash = filehash.hexdigest();
401 return_val.update({hashtypelistup: filegethash});
402 listnumcount += 1;
403 return return_val;
405 def http_status_to_reason(code):
406 reasons = {
407 100: 'Continue',
408 101: 'Switching Protocols',
409 102: 'Processing',
410 200: 'OK',
411 201: 'Created',
412 202: 'Accepted',
413 203: 'Non-Authoritative Information',
414 204: 'No Content',
415 205: 'Reset Content',
416 206: 'Partial Content',
417 207: 'Multi-Status',
418 208: 'Already Reported',
419 226: 'IM Used',
420 300: 'Multiple Choices',
421 301: 'Moved Permanently',
422 302: 'Found',
423 303: 'See Other',
424 304: 'Not Modified',
425 305: 'Use Proxy',
426 307: 'Temporary Redirect',
427 308: 'Permanent Redirect',
428 400: 'Bad Request',
429 401: 'Unauthorized',
430 402: 'Payment Required',
431 403: 'Forbidden',
432 404: 'Not Found',
433 405: 'Method Not Allowed',
434 406: 'Not Acceptable',
435 407: 'Proxy Authentication Required',
436 408: 'Request Timeout',
437 409: 'Conflict',
438 410: 'Gone',
439 411: 'Length Required',
440 412: 'Precondition Failed',
441 413: 'Payload Too Large',
442 414: 'URI Too Long',
443 415: 'Unsupported Media Type',
444 416: 'Range Not Satisfiable',
445 417: 'Expectation Failed',
446 421: 'Misdirected Request',
447 422: 'Unprocessable Entity',
448 423: 'Locked',
449 424: 'Failed Dependency',
450 426: 'Upgrade Required',
451 428: 'Precondition Required',
452 429: 'Too Many Requests',
453 431: 'Request Header Fields Too Large',
454 451: 'Unavailable For Legal Reasons',
455 500: 'Internal Server Error',
456 501: 'Not Implemented',
457 502: 'Bad Gateway',
458 503: 'Service Unavailable',
459 504: 'Gateway Timeout',
460 505: 'HTTP Version Not Supported',
461 506: 'Variant Also Negotiates',
462 507: 'Insufficient Storage',
463 508: 'Loop Detected',
464 510: 'Not Extended',
465 511: 'Network Authentication Required'
467 return reasons.get(code, 'Unknown Status Code');
469 def ftp_status_to_reason(code):
470 reasons = {
471 110: 'Restart marker reply',
472 120: 'Service ready in nnn minutes',
473 125: 'Data connection already open; transfer starting',
474 150: 'File status okay; about to open data connection',
475 200: 'Command okay',
476 202: 'Command not implemented, superfluous at this site',
477 211: 'System status, or system help reply',
478 212: 'Directory status',
479 213: 'File status',
480 214: 'Help message',
481 215: 'NAME system type',
482 220: 'Service ready for new user',
483 221: 'Service closing control connection',
484 225: 'Data connection open; no transfer in progress',
485 226: 'Closing data connection',
486 227: 'Entering Passive Mode',
487 230: 'User logged in, proceed',
488 250: 'Requested file action okay, completed',
489 257: '"PATHNAME" created',
490 331: 'User name okay, need password',
491 332: 'Need account for login',
492 350: 'Requested file action pending further information',
493 421: 'Service not available, closing control connection',
494 425: 'Can\'t open data connection',
495 426: 'Connection closed; transfer aborted',
496 450: 'Requested file action not taken',
497 451: 'Requested action aborted. Local error in processing',
498 452: 'Requested action not taken. Insufficient storage space in system',
499 500: 'Syntax error, command unrecognized',
500 501: 'Syntax error in parameters or arguments',
501 502: 'Command not implemented',
502 503: 'Bad sequence of commands',
503 504: 'Command not implemented for that parameter',
504 530: 'Not logged in',
505 532: 'Need account for storing files',
506 550: 'Requested action not taken. File unavailable',
507 551: 'Requested action aborted. Page type unknown',
508 552: 'Requested file action aborted. Exceeded storage allocation',
509 553: 'Requested action not taken. File name not allowed'
511 return reasons.get(code, 'Unknown Status Code');
513 def sftp_status_to_reason(code):
514 reasons = {
515 0: 'SSH_FX_OK',
516 1: 'SSH_FX_EOF',
517 2: 'SSH_FX_NO_SUCH_FILE',
518 3: 'SSH_FX_PERMISSION_DENIED',
519 4: 'SSH_FX_FAILURE',
520 5: 'SSH_FX_BAD_MESSAGE',
521 6: 'SSH_FX_NO_CONNECTION',
522 7: 'SSH_FX_CONNECTION_LOST',
523 8: 'SSH_FX_OP_UNSUPPORTED'
525 return reasons.get(code, 'Unknown Status Code');
527 def make_http_headers_from_dict_to_list(headers={'Referer': "http://google.com/", 'User-Agent': geturls_ua, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
528 if isinstance(headers, dict):
529 returnval = [];
530 if(sys.version[0]=="2"):
531 for headkey, headvalue in headers.iteritems():
532 returnval.append((headkey, headvalue));
533 if(sys.version[0]>="3"):
534 for headkey, headvalue in headers.items():
535 returnval.append((headkey, headvalue));
536 elif isinstance(headers, list):
537 returnval = headers;
538 else:
539 returnval = False;
540 return returnval;
542 def make_http_headers_from_dict_to_pycurl(headers={'Referer': "http://google.com/", 'User-Agent': geturls_ua, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
543 if isinstance(headers, dict):
544 returnval = [];
545 if(sys.version[0]=="2"):
546 for headkey, headvalue in headers.iteritems():
547 returnval.append(headkey+": "+headvalue);
548 if(sys.version[0]>="3"):
549 for headkey, headvalue in headers.items():
550 returnval.append(headkey+": "+headvalue);
551 elif isinstance(headers, list):
552 returnval = headers;
553 else:
554 returnval = False;
555 return returnval;
557 def make_http_headers_from_pycurl_to_dict(headers):
558 header_dict = {};
559 headers = headers.strip().split('\r\n');
560 for header in headers:
561 parts = header.split(': ', 1)
562 if(len(parts) == 2):
563 key, value = parts;
564 header_dict[key.title()] = value;
565 return header_dict;
567 def make_http_headers_from_list_to_dict(headers=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua), ("Accept-Encoding", compression_supported), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
568 if isinstance(headers, list):
569 returnval = {};
570 mli = 0;
571 mlil = len(headers);
572 while(mli<mlil):
573 returnval.update({headers[mli][0]: headers[mli][1]});
574 mli = mli + 1;
575 elif isinstance(headers, dict):
576 returnval = headers;
577 else:
578 returnval = False;
579 return returnval;
581 def get_httplib_support(checkvalue=None):
582 global haverequests, havemechanize, havehttplib2, haveurllib3, havehttpx, havehttpcore, haveparamiko, havepysftp;
583 returnval = [];
584 returnval.append("ftp");
585 returnval.append("httplib");
586 if(havehttplib2):
587 returnval.append("httplib2");
588 returnval.append("urllib");
589 if(haveurllib3):
590 returnval.append("urllib3");
591 returnval.append("request3");
592 returnval.append("request");
593 if(haverequests):
594 returnval.append("requests");
595 if(havehttpx):
596 returnval.append("httpx");
597 returnval.append("httpx2");
598 if(havemechanize):
599 returnval.append("mechanize");
600 if(havepycurl):
601 returnval.append("pycurl");
602 returnval.append("pycurl2");
603 returnval.append("pycurl3");
604 if(haveparamiko):
605 returnval.append("sftp");
606 if(havepysftp):
607 returnval.append("pysftp");
608 if(not checkvalue is None):
609 if(checkvalue=="urllib1" or checkvalue=="urllib2"):
610 checkvalue = "urllib";
611 if(checkvalue=="httplib1"):
612 checkvalue = "httplib";
613 if(checkvalue in returnval):
614 returnval = True;
615 else:
616 returnval = False;
617 return returnval;
619 def check_httplib_support(checkvalue="urllib"):
620 if(checkvalue=="urllib1" or checkvalue=="urllib2"):
621 checkvalue = "urllib";
622 if(checkvalue=="httplib1"):
623 checkvalue = "httplib";
624 returnval = get_httplib_support(checkvalue);
625 return returnval;
627 def get_httplib_support_list():
628 returnval = get_httplib_support(None);
629 return returnval;
631 def download_from_url(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, httplibuse="urllib", buffersize=524288, sleep=-1, timeout=10):
632 global geturls_download_sleep, haverequests, havemechanize, havepycurl, havehttplib2, haveurllib3, havehttpx, havehttpcore, haveparamiko, havepysftp;
633 if(sleep<0):
634 sleep = geturls_download_sleep;
635 if(timeout<=0):
636 timeout = 10;
637 if(httplibuse=="urllib1" or httplibuse=="urllib2" or httplibuse=="request"):
638 httplibuse = "urllib";
639 if(httplibuse=="httplib1"):
640 httplibuse = "httplib";
641 if(not haverequests and httplibuse=="requests"):
642 httplibuse = "urllib";
643 if(not havehttpx and httplibuse=="httpx"):
644 httplibuse = "urllib";
645 if(not havehttpx and httplibuse=="httpx2"):
646 httplibuse = "urllib";
647 if(not havehttpcore and httplibuse=="httpcore"):
648 httplibuse = "urllib";
649 if(not havehttpcore and httplibuse=="httpcore2"):
650 httplibuse = "urllib";
651 if(not havemechanize and httplibuse=="mechanize"):
652 httplibuse = "urllib";
653 if(not havepycurl and httplibuse=="pycurl"):
654 httplibuse = "urllib";
655 if(not havepycurl and httplibuse=="pycurl2"):
656 httplibuse = "urllib";
657 if(not havepycurl and httplibuse=="pycurl3"):
658 httplibuse = "urllib";
659 if(not havehttplib2 and httplibuse=="httplib2"):
660 httplibuse = "httplib";
661 if(not haveparamiko and httplibuse=="sftp"):
662 httplibuse = "ftp";
663 if(not havepysftp and httplibuse=="pysftp"):
664 httplibuse = "ftp";
665 if(httplibuse=="urllib" or httplibuse=="request"):
666 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
667 elif(httplibuse=="request"):
668 returnval = download_from_url_with_request(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
669 elif(httplibuse=="request3"):
670 returnval = download_from_url_with_request3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
671 elif(httplibuse=="httplib"):
672 returnval = download_from_url_with_httplib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
673 elif(httplibuse=="httplib2"):
674 returnval = download_from_url_with_httplib2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
675 elif(httplibuse=="urllib3" or httplibuse=="request3"):
676 returnval = download_from_url_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
677 elif(httplibuse=="requests"):
678 returnval = download_from_url_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
679 elif(httplibuse=="httpx"):
680 returnval = download_from_url_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
681 elif(httplibuse=="httpx2"):
682 returnval = download_from_url_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
683 elif(httplibuse=="httpcore"):
684 returnval = download_from_url_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
685 elif(httplibuse=="httpcore2"):
686 returnval = download_from_url_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
687 elif(httplibuse=="mechanize"):
688 returnval = download_from_url_with_mechanize(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
689 elif(httplibuse=="pycurl"):
690 returnval = download_from_url_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
691 elif(httplibuse=="pycurl2"):
692 returnval = download_from_url_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
693 elif(httplibuse=="pycurl3"):
694 returnval = download_from_url_with_pycurl3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
695 elif(httplibuse=="ftp"):
696 returnval = download_from_url_with_ftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
697 elif(httplibuse=="sftp"):
698 returnval = download_from_url_with_sftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
699 elif(httplibuse=="pysftp"):
700 returnval = download_from_url_with_pysftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
701 else:
702 returnval = False;
703 return returnval;
705 def download_from_url_file(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, httplibuse="urllib", ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
706 global geturls_download_sleep, haverequests, havemechanize, havepycurl, havehttplib2, haveurllib3, havehttpx, havehttpcore, haveparamiko, havepysftp;
707 if(sleep<0):
708 sleep = geturls_download_sleep;
709 if(timeout<=0):
710 timeout = 10;
711 if(httplibuse=="urllib1" or httplibuse=="urllib2" or httplibuse=="request"):
712 httplibuse = "urllib";
713 if(httplibuse=="httplib1"):
714 httplibuse = "httplib";
715 if(not haverequests and httplibuse=="requests"):
716 httplibuse = "urllib";
717 if(not havehttpx and httplibuse=="httpx"):
718 httplibuse = "urllib";
719 if(not havehttpx and httplibuse=="httpx2"):
720 httplibuse = "urllib";
721 if(not havehttpcore and httplibuse=="httpcore"):
722 httplibuse = "urllib";
723 if(not havehttpcore and httplibuse=="httpcore2"):
724 httplibuse = "urllib";
725 if(not havemechanize and httplibuse=="mechanize"):
726 httplibuse = "urllib";
727 if(not havepycurl and httplibuse=="pycurl"):
728 httplibuse = "urllib";
729 if(not havepycurl and httplibuse=="pycurl2"):
730 httplibuse = "urllib";
731 if(not havepycurl and httplibuse=="pycurl3"):
732 httplibuse = "urllib";
733 if(not havehttplib2 and httplibuse=="httplib2"):
734 httplibuse = "httplib";
735 if(not haveparamiko and httplibuse=="sftp"):
736 httplibuse = "ftp";
737 if(not haveparamiko and httplibuse=="pysftp"):
738 httplibuse = "ftp";
739 if(httplibuse=="urllib" or httplibuse=="request"):
740 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
741 elif(httplibuse=="request"):
742 returnval = download_from_url_file_with_request(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
743 elif(httplibuse=="request3"):
744 returnval = download_from_url_file_with_request3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
745 elif(httplibuse=="httplib"):
746 returnval = download_from_url_file_with_httplib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
747 elif(httplibuse=="httplib2"):
748 returnval = download_from_url_file_with_httplib2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
749 elif(httplibuse=="urllib3" or httplibuse=="request3"):
750 returnval = download_from_url_file_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
751 elif(httplibuse=="requests"):
752 returnval = download_from_url_file_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
753 elif(httplibuse=="httpx"):
754 returnval = download_from_url_file_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
755 elif(httplibuse=="httpx2"):
756 returnval = download_from_url_file_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
757 elif(httplibuse=="httpcore"):
758 returnval = download_from_url_file_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
759 elif(httplibuse=="httpcore2"):
760 returnval = download_from_url_file_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
761 elif(httplibuse=="mechanize"):
762 returnval = download_from_url_file_with_mechanize(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
763 elif(httplibuse=="pycurl"):
764 returnval = download_from_url_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
765 elif(httplibuse=="pycurl2"):
766 returnval = download_from_url_file_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
767 elif(httplibuse=="pycurl3"):
768 returnval = download_from_url_file_with_pycurl3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
769 elif(httplibuse=="ftp"):
770 returnval = download_from_url_file_with_ftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
771 elif(httplibuse=="sftp"):
772 returnval = download_from_url_file_with_sftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
773 elif(httplibuse=="pysftp"):
774 returnval = download_from_url_file_with_pysftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
775 else:
776 returnval = False;
777 return returnval;
779 def download_from_url_to_file(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, httplibuse="urllib", outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
780 global geturls_download_sleep, haverequests, havemechanize, havepycurl, havehttplib2, haveurllib3, havehttpx, havehttpcore, haveparamiko, havepysftp;
781 if(sleep<0):
782 sleep = geturls_download_sleep;
783 if(timeout<=0):
784 timeout = 10;
785 if(httplibuse=="urllib1" or httplibuse=="urllib2" or httplibuse=="request"):
786 httplibuse = "urllib";
787 if(httplibuse=="httplib1"):
788 httplibuse = "httplib";
789 if(not haverequests and httplibuse=="requests"):
790 httplibuse = "urllib";
791 if(not havehttpx and httplibuse=="httpx"):
792 httplibuse = "urllib";
793 if(not havehttpx and httplibuse=="httpx2"):
794 httplibuse = "urllib";
795 if(not havehttpcore and httplibuse=="httpcore"):
796 httplibuse = "urllib";
797 if(not havehttpcore and httplibuse=="httpcore2"):
798 httplibuse = "urllib";
799 if(not havemechanize and httplibuse=="mechanize"):
800 httplibuse = "urllib";
801 if(not havepycurl and httplibuse=="pycurl"):
802 httplibuse = "urllib";
803 if(not havepycurl and httplibuse=="pycurl2"):
804 httplibuse = "urllib";
805 if(not havepycurl and httplibuse=="pycurl3"):
806 httplibuse = "urllib";
807 if(not havehttplib2 and httplibuse=="httplib2"):
808 httplibuse = "httplib";
809 if(not haveparamiko and httplibuse=="sftp"):
810 httplibuse = "ftp";
811 if(not havepysftp and httplibuse=="pysftp"):
812 httplibuse = "ftp";
813 if(httplibuse=="urllib" or httplibuse=="request"):
814 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
815 elif(httplibuse=="request"):
816 returnval = download_from_url_to_file_with_request(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
817 elif(httplibuse=="request3"):
818 returnval = download_from_url_to_file_with_request3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
819 elif(httplibuse=="httplib"):
820 returnval = download_from_url_to_file_with_httplib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
821 elif(httplibuse=="httplib2"):
822 returnval = download_from_url_to_file_with_httplib2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
823 elif(httplibuse=="urllib3" or httplibuse=="request3"):
824 returnval = download_from_url_to_file_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
825 elif(httplibuse=="requests"):
826 returnval = download_from_url_to_file_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
827 elif(httplibuse=="httpx"):
828 returnval = download_from_url_to_file_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
829 elif(httplibuse=="httpx2"):
830 returnval = download_from_url_to_file_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
831 elif(httplibuse=="httpcore"):
832 returnval = download_from_url_to_file_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
833 elif(httplibuse=="httpcore2"):
834 returnval = download_from_url_to_file_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
835 elif(httplibuse=="mechanize"):
836 returnval = download_from_url_to_file_with_mechanize(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
837 elif(httplibuse=="pycurl"):
838 returnval = download_from_url_to_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
839 elif(httplibuse=="pycurl2"):
840 returnval = download_from_url_to_file_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
841 elif(httplibuse=="pycurl3"):
842 returnval = download_from_url_to_file_with_pycurl3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
843 elif(httplibuse=="ftp"):
844 returnval = download_from_url_to_file_with_ftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
845 elif(httplibuse=="sftp"):
846 returnval = download_from_url_to_file_with_sftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
847 elif(httplibuse=="pysftp"):
848 returnval = download_from_url_to_file_with_pysftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
849 else:
850 returnval = False;
851 return returnval;
853 def download_from_url_with_urllib(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
854 global geturls_download_sleep, havebrotli;
855 if(sleep<0):
856 sleep = geturls_download_sleep;
857 if(timeout<=0):
858 timeout = 10;
859 urlparts = urlparse.urlparse(httpurl);
860 if(isinstance(httpheaders, list)):
861 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
862 httpheaders = fix_header_names(httpheaders);
863 if(httpuseragent is not None):
864 if('User-Agent' in httpheaders):
865 httpheaders['User-Agent'] = httpuseragent;
866 else:
867 httpuseragent.update({'User-Agent': httpuseragent});
868 if(httpreferer is not None):
869 if('Referer' in httpheaders):
870 httpheaders['Referer'] = httpreferer;
871 else:
872 httpuseragent.update({'Referer': httpreferer});
873 if(urlparts.username is not None or urlparts.password is not None):
874 if(sys.version[0]=="2"):
875 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
876 if(sys.version[0]>="3"):
877 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
878 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
879 geturls_opener = build_opener(HTTPCookieProcessor(httpcookie));
880 if(isinstance(httpheaders, dict)):
881 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
882 geturls_opener.addheaders = httpheaders;
883 time.sleep(sleep);
884 if(postdata is not None and not isinstance(postdata, dict)):
885 postdata = urlencode(postdata);
886 try:
887 geturls_request = Request(httpurl);
888 if(httpmethod=="GET"):
889 geturls_text = geturls_opener.open(geturls_request);
890 elif(httpmethod=="POST"):
891 geturls_text = geturls_opener.open(geturls_request, data=postdata);
892 else:
893 geturls_text = geturls_opener.open(geturls_request);
894 except HTTPError as geturls_text_error:
895 geturls_text = geturls_text_error;
896 log.info("Error With URL "+httpurl);
897 except URLError:
898 log.info("Error With URL "+httpurl);
899 return False;
900 except socket.timeout:
901 log.info("Error With URL "+httpurl);
902 return False;
903 httpcodeout = geturls_text.getcode();
904 try:
905 httpcodereason = geturls_text.reason;
906 except AttributeError:
907 httpcodereason = http_status_to_reason(geturls_text.getcode());
908 try:
909 httpversionout = geturls_text.version;
910 except AttributeError:
911 httpversionout = "1.1";
912 httpmethodout = geturls_request.get_method();
913 httpurlout = geturls_text.geturl();
914 httpheaderout = geturls_text.info();
915 httpheadersentout = httpheaders;
916 if(isinstance(httpheaderout, list)):
917 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
918 httpheaderout = fix_header_names(httpheaderout);
919 if(sys.version[0]=="2"):
920 try:
921 prehttpheaderout = httpheaderout;
922 httpheaderkeys = httpheaderout.keys();
923 imax = len(httpheaderkeys);
924 ic = 0;
925 httpheaderout = {};
926 while(ic < imax):
927 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
928 ic += 1;
929 except AttributeError:
930 pass;
931 if(isinstance(httpheadersentout, list)):
932 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
933 httpheadersentout = fix_header_names(httpheadersentout);
934 log.info("Downloading URL "+httpurl);
935 downloadsize = httpheaderout.get('Content-Length');
936 if(downloadsize is not None):
937 downloadsize = int(downloadsize);
938 if downloadsize is None: downloadsize = 0;
939 fulldatasize = 0;
940 prevdownsize = 0;
941 log.info("Downloading URL "+httpurl);
942 with BytesIO() as strbuf:
943 while True:
944 databytes = geturls_text.read(buffersize);
945 if not databytes: break;
946 datasize = len(databytes);
947 fulldatasize = datasize + fulldatasize;
948 percentage = "";
949 if(downloadsize>0):
950 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
951 downloaddiff = fulldatasize - prevdownsize;
952 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
953 prevdownsize = fulldatasize;
954 strbuf.write(databytes);
955 strbuf.seek(0);
956 returnval_content = strbuf.read();
957 if(httpheaderout.get("Content-Encoding")=="gzip"):
958 try:
959 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
960 except zlib.error:
961 pass;
962 if(httpheaderout.get("Content-Encoding")=="deflate"):
963 try:
964 returnval_content = zlib.decompress(returnval_content);
965 except zlib.error:
966 pass;
967 if(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
968 try:
969 returnval_content = brotli.decompress(returnval_content);
970 except brotli.error:
971 pass;
972 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason};
973 geturls_text.close();
974 return returnval;
976 def download_from_url_file_with_urllib(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
977 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
978 exec_time_start = time.time();
979 myhash = hashlib.new("sha1");
980 if(sys.version[0]=="2"):
981 myhash.update(httpurl);
982 myhash.update(str(buffersize));
983 myhash.update(str(exec_time_start));
984 if(sys.version[0]>="3"):
985 myhash.update(httpurl.encode('utf-8'));
986 myhash.update(str(buffersize).encode('utf-8'));
987 myhash.update(str(exec_time_start).encode('utf-8'));
988 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
989 if(sleep<0):
990 sleep = geturls_download_sleep;
991 if(timeout<=0):
992 timeout = 10;
993 pretmpfilename = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
994 if(not pretmpfilename):
995 return False;
996 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
997 tmpfilename = f.name;
998 try:
999 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1000 except AttributeError:
1001 try:
1002 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1003 except ValueError:
1004 pass;
1005 except ValueError:
1006 pass;
1007 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason')};
1008 f.write(pretmpfilename['Content']);
1009 f.close();
1010 exec_time_end = time.time();
1011 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
1012 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
1013 return returnval;
1015 def download_from_url_to_file_with_urllib(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1016 global geturls_download_sleep;
1017 if(sleep<0):
1018 sleep = geturls_download_sleep;
1019 if(timeout<=0):
1020 timeout = 10;
1021 if(not outfile=="-"):
1022 outpath = outpath.rstrip(os.path.sep);
1023 filepath = os.path.realpath(outpath+os.path.sep+outfile);
1024 if(not os.path.exists(outpath)):
1025 os.makedirs(outpath);
1026 if(os.path.exists(outpath) and os.path.isfile(outpath)):
1027 return False;
1028 if(os.path.exists(filepath) and os.path.isdir(filepath)):
1029 return False;
1030 pretmpfilename = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1031 if(not pretmpfilename):
1032 return False;
1033 tmpfilename = pretmpfilename['Filename'];
1034 downloadsize = int(os.path.getsize(tmpfilename));
1035 fulldatasize = 0;
1036 log.info("Moving file "+tmpfilename+" to "+filepath);
1037 exec_time_start = time.time();
1038 shutil.move(tmpfilename, filepath);
1039 try:
1040 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1041 except AttributeError:
1042 try:
1043 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1044 except ValueError:
1045 pass;
1046 except ValueError:
1047 pass;
1048 exec_time_end = time.time();
1049 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
1050 if(os.path.exists(tmpfilename)):
1051 os.remove(tmpfilename);
1052 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
1053 if(outfile=="-"):
1054 pretmpfilename = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1055 tmpfilename = pretmpfilename['Filename'];
1056 downloadsize = int(os.path.getsize(tmpfilename));
1057 fulldatasize = 0;
1058 prevdownsize = 0;
1059 exec_time_start = time.time();
1060 with open(tmpfilename, 'rb') as ft:
1061 f = BytesIO();
1062 while True:
1063 databytes = ft.read(buffersize[1]);
1064 if not databytes: break;
1065 datasize = len(databytes);
1066 fulldatasize = datasize + fulldatasize;
1067 percentage = "";
1068 if(downloadsize>0):
1069 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1070 downloaddiff = fulldatasize - prevdownsize;
1071 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1072 prevdownsize = fulldatasize;
1073 f.write(databytes);
1074 f.seek(0);
1075 fdata = f.getvalue();
1076 f.close();
1077 ft.close();
1078 os.remove(tmpfilename);
1079 exec_time_end = time.time();
1080 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
1081 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
1082 return returnval;
1084 def download_from_url_with_httplib(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1085 global geturls_download_sleep, havebrotli;
1086 if(sleep<0):
1087 sleep = geturls_download_sleep;
1088 if(timeout<=0):
1089 timeout = 10;
1090 urlparts = urlparse.urlparse(httpurl);
1091 if(isinstance(httpheaders, list)):
1092 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
1093 httpheaders = fix_header_names(httpheaders);
1094 if(httpuseragent is not None):
1095 if('User-Agent' in httpheaders):
1096 httpheaders['User-Agent'] = httpuseragent;
1097 else:
1098 httpuseragent.update({'User-Agent': httpuseragent});
1099 if(httpreferer is not None):
1100 if('Referer' in httpheaders):
1101 httpheaders['Referer'] = httpreferer;
1102 else:
1103 httpuseragent.update({'Referer': httpreferer});
1104 if(urlparts.username is not None or urlparts.password is not None):
1105 if(sys.version[0]=="2"):
1106 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
1107 if(sys.version[0]>="3"):
1108 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
1109 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
1110 geturls_opener = build_opener(HTTPCookieProcessor(httpcookie));
1111 geturls_opener.addheaders = httpheaders;
1112 time.sleep(sleep);
1113 if(urlparts[0]=="http"):
1114 httpconn = HTTPConnection(urlparts[1], timeout=timeout);
1115 elif(urlparts[0]=="https"):
1116 httpconn = HTTPSConnection(urlparts[1], timeout=timeout);
1117 else:
1118 return False;
1119 if(postdata is not None and not isinstance(postdata, dict)):
1120 postdata = urlencode(postdata);
1121 try:
1122 if(httpmethod=="GET"):
1123 httpconn.request("GET", urlparts[2], headers=httpheaders);
1124 elif(httpmethod=="POST"):
1125 httpconn.request("GET", urlparts[2], body=postdata, headers=httpheaders);
1126 else:
1127 httpconn.request("GET", urlparts[2], headers=httpheaders);
1128 except socket.timeout:
1129 log.info("Error With URL "+httpurl);
1130 return False;
1131 except socket.gaierror:
1132 log.info("Error With URL "+httpurl);
1133 return False;
1134 except BlockingIOError:
1135 log.info("Error With URL "+httpurl);
1136 return False;
1137 geturls_text = httpconn.getresponse();
1138 httpcodeout = geturls_text.status;
1139 httpcodereason = geturls_text.reason;
1140 if(geturls_text.version=="10"):
1141 httpversionout = "1.0";
1142 else:
1143 httpversionout = "1.1";
1144 httpmethodout = geturls_text._method;
1145 httpurlout = httpurl;
1146 httpheaderout = geturls_text.getheaders();
1147 httpheadersentout = httpheaders;
1148 if(isinstance(httpheaderout, list)):
1149 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
1150 if(sys.version[0]=="2"):
1151 try:
1152 prehttpheaderout = httpheaderout;
1153 httpheaderkeys = httpheaderout.keys();
1154 imax = len(httpheaderkeys);
1155 ic = 0;
1156 httpheaderout = {};
1157 while(ic < imax):
1158 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
1159 ic += 1;
1160 except AttributeError:
1161 pass;
1162 httpheaderout = fix_header_names(httpheaderout);
1163 if(isinstance(httpheadersentout, list)):
1164 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
1165 httpheadersentout = fix_header_names(httpheadersentout);
1166 log.info("Downloading URL "+httpurl);
1167 downloadsize = httpheaderout.get('Content-Length');
1168 if(downloadsize is not None):
1169 downloadsize = int(downloadsize);
1170 if downloadsize is None: downloadsize = 0;
1171 fulldatasize = 0;
1172 prevdownsize = 0;
1173 log.info("Downloading URL "+httpurl);
1174 with BytesIO() as strbuf:
1175 while True:
1176 databytes = geturls_text.read(buffersize);
1177 if not databytes: break;
1178 datasize = len(databytes);
1179 fulldatasize = datasize + fulldatasize;
1180 percentage = "";
1181 if(downloadsize>0):
1182 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1183 downloaddiff = fulldatasize - prevdownsize;
1184 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1185 prevdownsize = fulldatasize;
1186 strbuf.write(databytes);
1187 strbuf.seek(0);
1188 returnval_content = strbuf.read();
1189 if(httpheaderout.get("Content-Encoding")=="gzip"):
1190 try:
1191 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
1192 except zlib.error:
1193 pass;
1194 if(httpheaderout.get("Content-Encoding")=="deflate"):
1195 try:
1196 returnval_content = zlib.decompress(returnval_content);
1197 except zlib.error:
1198 pass;
1199 if(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
1200 try:
1201 returnval_content = brotli.decompress(returnval_content);
1202 except brotli.error:
1203 pass;
1204 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason};
1205 geturls_text.close();
1206 return returnval;
1208 def download_from_url_file_with_httplib(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1209 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
1210 exec_time_start = time.time();
1211 myhash = hashlib.new("sha1");
1212 if(sys.version[0]=="2"):
1213 myhash.update(httpurl);
1214 myhash.update(str(buffersize));
1215 myhash.update(str(exec_time_start));
1216 if(sys.version[0]>="3"):
1217 myhash.update(httpurl.encode('utf-8'));
1218 myhash.update(str(buffersize).encode('utf-8'));
1219 myhash.update(str(exec_time_start).encode('utf-8'));
1220 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
1221 if(sleep<0):
1222 sleep = geturls_download_sleep;
1223 if(timeout<=0):
1224 timeout = 10;
1225 pretmpfilename = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
1226 if(not pretmpfilename):
1227 return False;
1228 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
1229 tmpfilename = f.name;
1230 try:
1231 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1232 except AttributeError:
1233 try:
1234 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1235 except ValueError:
1236 pass;
1237 except ValueError:
1238 pass;
1239 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason')};
1240 f.write(pretmpfilename['Content']);
1241 f.close();
1242 exec_time_end = time.time();
1243 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
1244 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
1245 return returnval;
1247 def download_from_url_to_file_with_httplib(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1248 global geturls_download_sleep;
1249 if(sleep<0):
1250 sleep = geturls_download_sleep;
1251 if(timeout<=0):
1252 timeout = 10;
1253 if(not outfile=="-"):
1254 outpath = outpath.rstrip(os.path.sep);
1255 filepath = os.path.realpath(outpath+os.path.sep+outfile);
1256 if(not os.path.exists(outpath)):
1257 os.makedirs(outpath);
1258 if(os.path.exists(outpath) and os.path.isfile(outpath)):
1259 return False;
1260 if(os.path.exists(filepath) and os.path.isdir(filepath)):
1261 return False;
1262 pretmpfilename = download_from_url_file_with_httplib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1263 if(not pretmpfilename):
1264 return False;
1265 tmpfilename = pretmpfilename['Filename'];
1266 downloadsize = int(os.path.getsize(tmpfilename));
1267 fulldatasize = 0;
1268 log.info("Moving file "+tmpfilename+" to "+filepath);
1269 exec_time_start = time.time();
1270 shutil.move(tmpfilename, filepath);
1271 try:
1272 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1273 except AttributeError:
1274 try:
1275 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1276 except ValueError:
1277 pass;
1278 except ValueError:
1279 pass;
1280 exec_time_end = time.time();
1281 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
1282 if(os.path.exists(tmpfilename)):
1283 os.remove(tmpfilename);
1284 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
1285 if(outfile=="-"):
1286 pretmpfilename = download_from_url_file_with_httplib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1287 tmpfilename = pretmpfilename['Filename'];
1288 downloadsize = int(os.path.getsize(tmpfilename));
1289 fulldatasize = 0;
1290 prevdownsize = 0;
1291 exec_time_start = time.time();
1292 with open(tmpfilename, 'rb') as ft:
1293 f = BytesIO();
1294 while True:
1295 databytes = ft.read(buffersize[1]);
1296 if not databytes: break;
1297 datasize = len(databytes);
1298 fulldatasize = datasize + fulldatasize;
1299 percentage = "";
1300 if(downloadsize>0):
1301 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1302 downloaddiff = fulldatasize - prevdownsize;
1303 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1304 prevdownsize = fulldatasize;
1305 f.write(databytes);
1306 f.seek(0);
1307 fdata = f.getvalue();
1308 f.close();
1309 ft.close();
1310 os.remove(tmpfilename);
1311 exec_time_end = time.time();
1312 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
1313 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
1314 return returnval;
1316 if(havehttplib2):
1317 def download_from_url_with_httplib2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1318 global geturls_download_sleep, havebrotli;
1319 if(sleep<0):
1320 sleep = geturls_download_sleep;
1321 if(timeout<=0):
1322 timeout = 10;
1323 urlparts = urlparse.urlparse(httpurl);
1324 if(isinstance(httpheaders, list)):
1325 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
1326 httpheaders = fix_header_names(httpheaders);
1327 if(httpuseragent is not None):
1328 if('User-Agent' in httpheaders):
1329 httpheaders['User-Agent'] = httpuseragent;
1330 else:
1331 httpuseragent.update({'User-Agent': httpuseragent});
1332 if(httpreferer is not None):
1333 if('Referer' in httpheaders):
1334 httpheaders['Referer'] = httpreferer;
1335 else:
1336 httpuseragent.update({'Referer': httpreferer});
1337 if(urlparts.username is not None or urlparts.password is not None):
1338 if(sys.version[0]=="2"):
1339 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
1340 if(sys.version[0]>="3"):
1341 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
1342 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
1343 geturls_opener = build_opener(HTTPCookieProcessor(httpcookie));
1344 geturls_opener.addheaders = httpheaders;
1345 time.sleep(sleep);
1346 if(urlparts[0]=="http"):
1347 httpconn = HTTPConnectionWithTimeout(urlparts[1], timeout=timeout);
1348 elif(urlparts[0]=="https"):
1349 httpconn = HTTPSConnectionWithTimeout(urlparts[1], timeout=timeout);
1350 else:
1351 return False;
1352 if(postdata is not None and not isinstance(postdata, dict)):
1353 postdata = urlencode(postdata);
1354 try:
1355 if(httpmethod=="GET"):
1356 httpconn.request("GET", urlparts[2], headers=httpheaders);
1357 elif(httpmethod=="POST"):
1358 httpconn.request("GET", urlparts[2], body=postdata, headers=httpheaders);
1359 else:
1360 httpconn.request("GET", urlparts[2], headers=httpheaders);
1361 except socket.timeout:
1362 log.info("Error With URL "+httpurl);
1363 return False;
1364 except socket.gaierror:
1365 log.info("Error With URL "+httpurl);
1366 return False;
1367 except BlockingIOError:
1368 log.info("Error With URL "+httpurl);
1369 return False;
1370 geturls_text = httpconn.getresponse();
1371 httpcodeout = geturls_text.status;
1372 httpcodereason = geturls_text.reason;
1373 if(geturls_text.version=="10"):
1374 httpversionout = "1.0";
1375 else:
1376 httpversionout = "1.1";
1377 httpmethodout = httpmethod;
1378 httpurlout = httpurl;
1379 httpheaderout = geturls_text.getheaders();
1380 httpheadersentout = httpheaders;
1381 if(isinstance(httpheaderout, list)):
1382 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
1383 if(sys.version[0]=="2"):
1384 try:
1385 prehttpheaderout = httpheaderout;
1386 httpheaderkeys = httpheaderout.keys();
1387 imax = len(httpheaderkeys);
1388 ic = 0;
1389 httpheaderout = {};
1390 while(ic < imax):
1391 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
1392 ic += 1;
1393 except AttributeError:
1394 pass;
1395 httpheaderout = fix_header_names(httpheaderout);
1396 if(isinstance(httpheadersentout, list)):
1397 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
1398 httpheadersentout = fix_header_names(httpheadersentout);
1399 log.info("Downloading URL "+httpurl);
1400 downloadsize = httpheaderout.get('Content-Length');
1401 if(downloadsize is not None):
1402 downloadsize = int(downloadsize);
1403 if downloadsize is None: downloadsize = 0;
1404 fulldatasize = 0;
1405 prevdownsize = 0;
1406 log.info("Downloading URL "+httpurl);
1407 with BytesIO() as strbuf:
1408 while True:
1409 databytes = geturls_text.read(buffersize);
1410 if not databytes: break;
1411 datasize = len(databytes);
1412 fulldatasize = datasize + fulldatasize;
1413 percentage = "";
1414 if(downloadsize>0):
1415 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1416 downloaddiff = fulldatasize - prevdownsize;
1417 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1418 prevdownsize = fulldatasize;
1419 strbuf.write(databytes);
1420 strbuf.seek(0);
1421 returnval_content = strbuf.read();
1422 if(httpheaderout.get("Content-Encoding")=="gzip"):
1423 try:
1424 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
1425 except zlib.error:
1426 pass;
1427 if(httpheaderout.get("Content-Encoding")=="deflate"):
1428 try:
1429 returnval_content = zlib.decompress(returnval_content);
1430 except zlib.error:
1431 pass;
1432 if(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
1433 try:
1434 returnval_content = brotli.decompress(returnval_content);
1435 except brotli.error:
1436 pass;
1437 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason};
1438 geturls_text.close();
1439 return returnval;
1441 if(not havehttplib2):
1442 def download_from_url_with_httplib2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1443 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
1444 return returnval;
1446 if(havehttplib2):
1447 def download_from_url_file_with_httplib2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1448 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
1449 exec_time_start = time.time();
1450 myhash = hashlib.new("sha1");
1451 if(sys.version[0]=="2"):
1452 myhash.update(httpurl);
1453 myhash.update(str(buffersize));
1454 myhash.update(str(exec_time_start));
1455 if(sys.version[0]>="3"):
1456 myhash.update(httpurl.encode('utf-8'));
1457 myhash.update(str(buffersize).encode('utf-8'));
1458 myhash.update(str(exec_time_start).encode('utf-8'));
1459 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
1460 if(sleep<0):
1461 sleep = geturls_download_sleep;
1462 if(timeout<=0):
1463 timeout = 10;
1464 pretmpfilename = download_from_url_with_httplib2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
1465 if(not pretmpfilename):
1466 return False;
1467 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
1468 tmpfilename = f.name;
1469 try:
1470 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1471 except AttributeError:
1472 try:
1473 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1474 except ValueError:
1475 pass;
1476 except ValueError:
1477 pass;
1478 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason')};
1479 f.write(pretmpfilename['Content']);
1480 f.close();
1481 exec_time_end = time.time();
1482 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
1483 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
1484 return returnval;
1486 if(not havehttplib2):
1487 def download_from_url_file_with_httplib2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1488 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
1489 return returnval;
1491 if(havehttplib2):
1492 def download_from_url_to_file_with_httplib2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1493 global geturls_download_sleep;
1494 if(sleep<0):
1495 sleep = geturls_download_sleep;
1496 if(timeout<=0):
1497 timeout = 10;
1498 if(not outfile=="-"):
1499 outpath = outpath.rstrip(os.path.sep);
1500 filepath = os.path.realpath(outpath+os.path.sep+outfile);
1501 if(not os.path.exists(outpath)):
1502 os.makedirs(outpath);
1503 if(os.path.exists(outpath) and os.path.isfile(outpath)):
1504 return False;
1505 if(os.path.exists(filepath) and os.path.isdir(filepath)):
1506 return False;
1507 pretmpfilename = download_from_url_file_with_httplib2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1508 if(not pretmpfilename):
1509 return False;
1510 tmpfilename = pretmpfilename['Filename'];
1511 downloadsize = int(os.path.getsize(tmpfilename));
1512 fulldatasize = 0;
1513 log.info("Moving file "+tmpfilename+" to "+filepath);
1514 exec_time_start = time.time();
1515 shutil.move(tmpfilename, filepath);
1516 try:
1517 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1518 except AttributeError:
1519 try:
1520 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1521 except ValueError:
1522 pass;
1523 except ValueError:
1524 pass;
1525 exec_time_end = time.time();
1526 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
1527 if(os.path.exists(tmpfilename)):
1528 os.remove(tmpfilename);
1529 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
1530 if(outfile=="-"):
1531 pretmpfilename = download_from_url_file_with_httplib2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1532 tmpfilename = pretmpfilename['Filename'];
1533 downloadsize = int(os.path.getsize(tmpfilename));
1534 fulldatasize = 0;
1535 prevdownsize = 0;
1536 exec_time_start = time.time();
1537 with open(tmpfilename, 'rb') as ft:
1538 f = BytesIO();
1539 while True:
1540 databytes = ft.read(buffersize[1]);
1541 if not databytes: break;
1542 datasize = len(databytes);
1543 fulldatasize = datasize + fulldatasize;
1544 percentage = "";
1545 if(downloadsize>0):
1546 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1547 downloaddiff = fulldatasize - prevdownsize;
1548 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1549 prevdownsize = fulldatasize;
1550 f.write(databytes);
1551 f.seek(0);
1552 fdata = f.getvalue();
1553 f.close();
1554 ft.close();
1555 os.remove(tmpfilename);
1556 exec_time_end = time.time();
1557 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
1558 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
1559 return returnval;
1561 if(not havehttplib2):
1562 def download_from_url_to_file_with_httplib2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1563 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
1564 return returnval;
1566 def download_from_url_with_request(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1567 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
1568 return returnval;
1570 def download_from_url_file_with_request(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1571 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
1572 return returnval;
1574 def download_from_url_to_file_with_request(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1575 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
1576 return returnval;
1578 if(haverequests):
1579 def download_from_url_with_requests(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1580 global geturls_download_sleep, havebrotli;
1581 if(sleep<0):
1582 sleep = geturls_download_sleep;
1583 if(timeout<=0):
1584 timeout = 10;
1585 urlparts = urlparse.urlparse(httpurl);
1586 if(isinstance(httpheaders, list)):
1587 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
1588 httpheaders = fix_header_names(httpheaders);
1589 if(httpuseragent is not None):
1590 if('User-Agent' in httpheaders):
1591 httpheaders['User-Agent'] = httpuseragent;
1592 else:
1593 httpuseragent.update({'User-Agent': httpuseragent});
1594 if(httpreferer is not None):
1595 if('Referer' in httpheaders):
1596 httpheaders['Referer'] = httpreferer;
1597 else:
1598 httpuseragent.update({'Referer': httpreferer});
1599 if(urlparts.username is not None or urlparts.password is not None):
1600 if(sys.version[0]=="2"):
1601 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
1602 if(sys.version[0]>="3"):
1603 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
1604 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
1605 time.sleep(sleep);
1606 if(postdata is not None and not isinstance(postdata, dict)):
1607 postdata = urlencode(postdata);
1608 try:
1609 reqsession = requests.Session();
1610 if(httpmethod=="GET"):
1611 geturls_text = reqsession.get(httpurl, headers=httpheaders, cookies=httpcookie, stream=True);
1612 elif(httpmethod=="POST"):
1613 geturls_text = reqsession.post(httpurl, data=postdata, headers=httpheaders, cookies=httpcookie, stream=True);
1614 else:
1615 geturls_text = reqsession.get(httpurl, headers=httpheaders, cookies=httpcookie, stream=True);
1616 except requests.exceptions.ConnectTimeout:
1617 log.info("Error With URL "+httpurl);
1618 return False;
1619 except requests.exceptions.ConnectError:
1620 log.info("Error With URL "+httpurl);
1621 return False;
1622 except socket.timeout:
1623 log.info("Error With URL "+httpurl);
1624 return False;
1625 httpcodeout = geturls_text.status_code;
1626 httpcodereason = geturls_text.reason;
1627 if(geturls_text.raw.version=="10"):
1628 httpversionout = "1.0";
1629 else:
1630 httpversionout = "1.1";
1631 httpmethodout = httpmethod;
1632 httpurlout = geturls_text.url;
1633 httpheaderout = geturls_text.headers;
1634 httpheadersentout = geturls_text.request.headers;
1635 if(isinstance(httpheaderout, list)):
1636 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
1637 if(sys.version[0]=="2"):
1638 try:
1639 prehttpheaderout = httpheaderout;
1640 httpheaderkeys = httpheaderout.keys();
1641 imax = len(httpheaderkeys);
1642 ic = 0;
1643 httpheaderout = {};
1644 while(ic < imax):
1645 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
1646 ic += 1;
1647 except AttributeError:
1648 pass;
1649 httpheaderout = fix_header_names(httpheaderout);
1650 if(isinstance(httpheadersentout, list)):
1651 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
1652 httpheadersentout = fix_header_names(httpheadersentout);
1653 log.info("Downloading URL "+httpurl);
1654 downloadsize = httpheaderout.get('Content-Length');
1655 if(downloadsize is not None):
1656 downloadsize = int(downloadsize);
1657 if downloadsize is None: downloadsize = 0;
1658 fulldatasize = 0;
1659 prevdownsize = 0;
1660 log.info("Downloading URL "+httpurl);
1661 with BytesIO() as strbuf:
1662 while True:
1663 databytes = geturls_text.raw.read(buffersize);
1664 if not databytes: break;
1665 datasize = len(databytes);
1666 fulldatasize = datasize + fulldatasize;
1667 percentage = "";
1668 if(downloadsize>0):
1669 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1670 downloaddiff = fulldatasize - prevdownsize;
1671 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1672 prevdownsize = fulldatasize;
1673 strbuf.write(databytes);
1674 strbuf.seek(0);
1675 returnval_content = strbuf.read();
1676 if(httpheaderout.get("Content-Encoding")=="gzip"):
1677 try:
1678 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
1679 except zlib.error:
1680 pass;
1681 if(httpheaderout.get("Content-Encoding")=="deflate"):
1682 try:
1683 returnval_content = zlib.decompress(returnval_content);
1684 except zlib.error:
1685 pass;
1686 if(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
1687 try:
1688 returnval_content = brotli.decompress(returnval_content);
1689 except brotli.error:
1690 pass;
1691 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason};
1692 geturls_text.close();
1693 return returnval;
1695 if(not haverequests):
1696 def download_from_url_with_requests(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1697 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
1698 return returnval;
1700 if(haverequests):
1701 def download_from_url_file_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1702 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
1703 exec_time_start = time.time();
1704 myhash = hashlib.new("sha1");
1705 if(sys.version[0]=="2"):
1706 myhash.update(httpurl);
1707 myhash.update(str(buffersize));
1708 myhash.update(str(exec_time_start));
1709 if(sys.version[0]>="3"):
1710 myhash.update(httpurl.encode('utf-8'));
1711 myhash.update(str(buffersize).encode('utf-8'));
1712 myhash.update(str(exec_time_start).encode('utf-8'));
1713 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
1714 if(sleep<0):
1715 sleep = geturls_download_sleep;
1716 if(timeout<=0):
1717 timeout = 10;
1718 pretmpfilename = download_from_url_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
1719 if(not pretmpfilename):
1720 return False;
1721 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
1722 tmpfilename = f.name;
1723 try:
1724 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1725 except AttributeError:
1726 try:
1727 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1728 except ValueError:
1729 pass;
1730 except ValueError:
1731 pass;
1732 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason')};
1733 f.write(pretmpfilename['Content']);
1734 f.close();
1735 exec_time_end = time.time();
1736 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
1737 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
1738 return returnval;
1740 if(not haverequests):
1741 def download_from_url_file_with_requests(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1742 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
1743 return returnval;
1745 if(haverequests):
1746 def download_from_url_to_file_with_requests(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1747 global geturls_download_sleep;
1748 if(sleep<0):
1749 sleep = geturls_download_sleep;
1750 if(timeout<=0):
1751 timeout = 10;
1752 if(not outfile=="-"):
1753 outpath = outpath.rstrip(os.path.sep);
1754 filepath = os.path.realpath(outpath+os.path.sep+outfile);
1755 if(not os.path.exists(outpath)):
1756 os.makedirs(outpath);
1757 if(os.path.exists(outpath) and os.path.isfile(outpath)):
1758 return False;
1759 if(os.path.exists(filepath) and os.path.isdir(filepath)):
1760 return False;
1761 pretmpfilename = download_from_url_file_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1762 if(not pretmpfilename):
1763 return False;
1764 tmpfilename = pretmpfilename['Filename'];
1765 downloadsize = int(os.path.getsize(tmpfilename));
1766 fulldatasize = 0;
1767 log.info("Moving file "+tmpfilename+" to "+filepath);
1768 exec_time_start = time.time();
1769 shutil.move(tmpfilename, filepath);
1770 try:
1771 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1772 except AttributeError:
1773 try:
1774 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1775 except ValueError:
1776 pass;
1777 except ValueError:
1778 pass;
1779 exec_time_end = time.time();
1780 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
1781 if(os.path.exists(tmpfilename)):
1782 os.remove(tmpfilename);
1783 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
1784 if(outfile=="-"):
1785 pretmpfilename = download_from_url_file_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1786 tmpfilename = pretmpfilename['Filename'];
1787 downloadsize = int(os.path.getsize(tmpfilename));
1788 fulldatasize = 0;
1789 prevdownsize = 0;
1790 exec_time_start = time.time();
1791 with open(tmpfilename, 'rb') as ft:
1792 f = BytesIO();
1793 while True:
1794 databytes = ft.read(buffersize[1]);
1795 if not databytes: break;
1796 datasize = len(databytes);
1797 fulldatasize = datasize + fulldatasize;
1798 percentage = "";
1799 if(downloadsize>0):
1800 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1801 downloaddiff = fulldatasize - prevdownsize;
1802 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1803 prevdownsize = fulldatasize;
1804 f.write(databytes);
1805 f.seek(0);
1806 fdata = f.getvalue();
1807 f.close();
1808 ft.close();
1809 os.remove(tmpfilename);
1810 exec_time_end = time.time();
1811 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
1812 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
1813 return returnval;
1815 if(not haverequests):
1816 def download_from_url_to_file_with_requests(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1817 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
1818 return returnval;
1820 if(havehttpx):
1821 def download_from_url_with_httpx(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1822 global geturls_download_sleep, havebrotli;
1823 if(sleep<0):
1824 sleep = geturls_download_sleep;
1825 if(timeout<=0):
1826 timeout = 10;
1827 urlparts = urlparse.urlparse(httpurl);
1828 if(isinstance(httpheaders, list)):
1829 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
1830 httpheaders = fix_header_names(httpheaders);
1831 if(httpuseragent is not None):
1832 if('User-Agent' in httpheaders):
1833 httpheaders['User-Agent'] = httpuseragent;
1834 else:
1835 httpuseragent.update({'User-Agent': httpuseragent});
1836 if(httpreferer is not None):
1837 if('Referer' in httpheaders):
1838 httpheaders['Referer'] = httpreferer;
1839 else:
1840 httpuseragent.update({'Referer': httpreferer});
1841 if(urlparts.username is not None or urlparts.password is not None):
1842 if(sys.version[0]=="2"):
1843 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
1844 if(sys.version[0]>="3"):
1845 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
1846 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
1847 time.sleep(sleep);
1848 if(postdata is not None and not isinstance(postdata, dict)):
1849 postdata = urlencode(postdata);
1850 try:
1851 if(httpmethod=="GET"):
1852 httpx_pool = httpx.Client(http1=True, http2=False, trust_env=True);
1853 geturls_text = httpx_pool.get(httpurl, timeout=timeout, headers=httpheaders, cookies=httpcookie);
1854 elif(httpmethod=="POST"):
1855 httpx_pool = httpx.Client(http1=True, http2=False, trust_env=True);
1856 geturls_text = httpx_pool.post(httpurl, timeout=timeout, data=postdata, headers=httpheaders, cookies=httpcookie);
1857 else:
1858 httpx_pool = httpx.Client(http1=True, http2=False, trust_env=True);
1859 geturls_text = httpx_pool.get(httpurl, timeout=timeout, headers=httpheaders, cookies=httpcookie);
1860 except httpx.ConnectTimeout:
1861 log.info("Error With URL "+httpurl);
1862 return False;
1863 except httpx.ConnectError:
1864 log.info("Error With URL "+httpurl);
1865 return False;
1866 except socket.timeout:
1867 log.info("Error With URL "+httpurl);
1868 return False;
1869 httpcodeout = geturls_text.status_code;
1870 httpcodereason = geturls_text.reason_phrase;
1871 httpversionout = geturls_text.http_version;
1872 httpmethodout = httpmethod;
1873 httpurlout = str(geturls_text.url);
1874 httpheaderout = geturls_text.headers;
1875 httpheadersentout = geturls_text.request.headers;
1876 if(isinstance(httpheaderout, list)):
1877 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
1878 if(sys.version[0]=="2"):
1879 try:
1880 prehttpheaderout = httpheaderout;
1881 httpheaderkeys = httpheaderout.keys();
1882 imax = len(httpheaderkeys);
1883 ic = 0;
1884 httpheaderout = {};
1885 while(ic < imax):
1886 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
1887 ic += 1;
1888 except AttributeError:
1889 pass;
1890 httpheaderout = fix_header_names(httpheaderout);
1891 if(isinstance(httpheadersentout, list)):
1892 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
1893 httpheadersentout = fix_header_names(httpheadersentout);
1894 log.info("Downloading URL "+httpurl);
1895 downloadsize = httpheaderout.get('Content-Length');
1896 if(downloadsize is not None):
1897 downloadsize = int(downloadsize);
1898 if downloadsize is None: downloadsize = 0;
1899 fulldatasize = 0;
1900 prevdownsize = 0;
1901 log.info("Downloading URL "+httpurl);
1902 with BytesIO() as strbuf:
1903 while True:
1904 databytes = geturls_text.read(buffersize);
1905 if not databytes: break;
1906 datasize = len(databytes);
1907 fulldatasize = datasize + fulldatasize;
1908 percentage = "";
1909 if(downloadsize>0):
1910 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1911 downloaddiff = fulldatasize - prevdownsize;
1912 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1913 prevdownsize = fulldatasize;
1914 strbuf.write(databytes);
1915 strbuf.seek(0);
1916 returnval_content = strbuf.read();
1917 if(httpheaderout.get("Content-Encoding")=="gzip"):
1918 try:
1919 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
1920 except zlib.error:
1921 pass;
1922 if(httpheaderout.get("Content-Encoding")=="deflate"):
1923 try:
1924 returnval_content = zlib.decompress(returnval_content);
1925 except zlib.error:
1926 pass;
1927 if(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
1928 try:
1929 returnval_content = brotli.decompress(returnval_content);
1930 except brotli.error:
1931 pass;
1932 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason};
1933 geturls_text.close();
1934 return returnval;
1936 if(not havehttpx):
1937 def download_from_url_with_httpx(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1938 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
1939 return returnval;
1941 if(havehttpx):
1942 def download_from_url_file_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1943 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
1944 exec_time_start = time.time();
1945 myhash = hashlib.new("sha1");
1946 if(sys.version[0]=="2"):
1947 myhash.update(httpurl);
1948 myhash.update(str(buffersize));
1949 myhash.update(str(exec_time_start));
1950 if(sys.version[0]>="3"):
1951 myhash.update(httpurl.encode('utf-8'));
1952 myhash.update(str(buffersize).encode('utf-8'));
1953 myhash.update(str(exec_time_start).encode('utf-8'));
1954 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
1955 if(sleep<0):
1956 sleep = geturls_download_sleep;
1957 if(timeout<=0):
1958 timeout = 10;
1959 pretmpfilename = download_from_url_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
1960 if(not pretmpfilename):
1961 return False;
1962 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
1963 tmpfilename = f.name;
1964 try:
1965 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1966 except AttributeError:
1967 try:
1968 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1969 except ValueError:
1970 pass;
1971 except ValueError:
1972 pass;
1973 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason')};
1974 f.write(pretmpfilename['Content']);
1975 f.close();
1976 exec_time_end = time.time();
1977 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
1978 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
1979 return returnval;
1981 if(not havehttpx):
1982 def download_from_url_file_with_httpx(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1983 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
1984 return returnval;
1986 if(havehttpx):
1987 def download_from_url_to_file_with_httpx(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1988 global geturls_download_sleep;
1989 if(sleep<0):
1990 sleep = geturls_download_sleep;
1991 if(timeout<=0):
1992 timeout = 10;
1993 if(not outfile=="-"):
1994 outpath = outpath.rstrip(os.path.sep);
1995 filepath = os.path.realpath(outpath+os.path.sep+outfile);
1996 if(not os.path.exists(outpath)):
1997 os.makedirs(outpath);
1998 if(os.path.exists(outpath) and os.path.isfile(outpath)):
1999 return False;
2000 if(os.path.exists(filepath) and os.path.isdir(filepath)):
2001 return False;
2002 pretmpfilename = download_from_url_file_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2003 if(not pretmpfilename):
2004 return False;
2005 tmpfilename = pretmpfilename['Filename'];
2006 downloadsize = int(os.path.getsize(tmpfilename));
2007 fulldatasize = 0;
2008 log.info("Moving file "+tmpfilename+" to "+filepath);
2009 exec_time_start = time.time();
2010 shutil.move(tmpfilename, filepath);
2011 try:
2012 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2013 except AttributeError:
2014 try:
2015 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2016 except ValueError:
2017 pass;
2018 except ValueError:
2019 pass;
2020 exec_time_end = time.time();
2021 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
2022 if(os.path.exists(tmpfilename)):
2023 os.remove(tmpfilename);
2024 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
2025 if(outfile=="-"):
2026 pretmpfilename = download_from_url_file_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2027 tmpfilename = pretmpfilename['Filename'];
2028 downloadsize = int(os.path.getsize(tmpfilename));
2029 fulldatasize = 0;
2030 prevdownsize = 0;
2031 exec_time_start = time.time();
2032 with open(tmpfilename, 'rb') as ft:
2033 f = BytesIO();
2034 while True:
2035 databytes = ft.read(buffersize[1]);
2036 if not databytes: break;
2037 datasize = len(databytes);
2038 fulldatasize = datasize + fulldatasize;
2039 percentage = "";
2040 if(downloadsize>0):
2041 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2042 downloaddiff = fulldatasize - prevdownsize;
2043 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2044 prevdownsize = fulldatasize;
2045 f.write(databytes);
2046 f.seek(0);
2047 fdata = f.getvalue();
2048 f.close();
2049 ft.close();
2050 os.remove(tmpfilename);
2051 exec_time_end = time.time();
2052 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
2053 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
2054 return returnval;
2056 if(not havehttpx):
2057 def download_from_url_to_file_with_httpx(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2058 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
2059 return returnval;
2061 if(havehttpx):
2062 def download_from_url_with_httpx2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2063 global geturls_download_sleep, havebrotli;
2064 if(sleep<0):
2065 sleep = geturls_download_sleep;
2066 if(timeout<=0):
2067 timeout = 10;
2068 urlparts = urlparse.urlparse(httpurl);
2069 if(isinstance(httpheaders, list)):
2070 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
2071 httpheaders = fix_header_names(httpheaders);
2072 if(httpuseragent is not None):
2073 if('User-Agent' in httpheaders):
2074 httpheaders['User-Agent'] = httpuseragent;
2075 else:
2076 httpuseragent.update({'User-Agent': httpuseragent});
2077 if(httpreferer is not None):
2078 if('Referer' in httpheaders):
2079 httpheaders['Referer'] = httpreferer;
2080 else:
2081 httpuseragent.update({'Referer': httpreferer});
2082 if(urlparts.username is not None or urlparts.password is not None):
2083 if(sys.version[0]=="2"):
2084 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
2085 if(sys.version[0]>="3"):
2086 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
2087 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
2088 time.sleep(sleep);
2089 if(postdata is not None and not isinstance(postdata, dict)):
2090 postdata = urlencode(postdata);
2091 try:
2092 if(httpmethod=="GET"):
2093 httpx_pool = httpx.Client(http1=True, http2=True, trust_env=True);
2094 geturls_text = httpx_pool.get(httpurl, timeout=timeout, headers=httpheaders, cookies=httpcookie);
2095 elif(httpmethod=="POST"):
2096 httpx_pool = httpx.Client(http1=True, http2=True, trust_env=True);
2097 geturls_text = httpx_pool.post(httpurl, timeout=timeout, data=postdata, headers=httpheaders, cookies=httpcookie);
2098 else:
2099 httpx_pool = httpx.Client(http1=True, http2=True, trust_env=True);
2100 geturls_text = httpx_pool.get(httpurl, timeout=timeout, headers=httpheaders, cookies=httpcookie);
2101 except httpx.ConnectTimeout:
2102 log.info("Error With URL "+httpurl);
2103 return False;
2104 except httpx.ConnectError:
2105 log.info("Error With URL "+httpurl);
2106 return False;
2107 except socket.timeout:
2108 log.info("Error With URL "+httpurl);
2109 return False;
2110 httpcodeout = geturls_text.status_code;
2111 httpcodereason = geturls_text.reason_phrase;
2112 httpversionout = geturls_text.http_version;
2113 httpmethodout = httpmethod;
2114 httpurlout = str(geturls_text.url);
2115 httpheaderout = geturls_text.headers;
2116 httpheadersentout = geturls_text.request.headers;
2117 if(isinstance(httpheaderout, list)):
2118 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
2119 if(sys.version[0]=="2"):
2120 try:
2121 prehttpheaderout = httpheaderout;
2122 httpheaderkeys = httpheaderout.keys();
2123 imax = len(httpheaderkeys);
2124 ic = 0;
2125 httpheaderout = {};
2126 while(ic < imax):
2127 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
2128 ic += 1;
2129 except AttributeError:
2130 pass;
2131 httpheaderout = fix_header_names(httpheaderout);
2132 if(isinstance(httpheadersentout, list)):
2133 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
2134 httpheadersentout = fix_header_names(httpheadersentout);
2135 log.info("Downloading URL "+httpurl);
2136 downloadsize = httpheaderout.get('Content-Length');
2137 if(downloadsize is not None):
2138 downloadsize = int(downloadsize);
2139 if downloadsize is None: downloadsize = 0;
2140 fulldatasize = 0;
2141 prevdownsize = 0;
2142 log.info("Downloading URL "+httpurl);
2143 with BytesIO() as strbuf:
2144 while True:
2145 databytes = geturls_text.read(buffersize);
2146 if not databytes: break;
2147 datasize = len(databytes);
2148 fulldatasize = datasize + fulldatasize;
2149 percentage = "";
2150 if(downloadsize>0):
2151 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2152 downloaddiff = fulldatasize - prevdownsize;
2153 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2154 prevdownsize = fulldatasize;
2155 strbuf.write(databytes);
2156 strbuf.seek(0);
2157 returnval_content = strbuf.read();
2158 if(httpheaderout.get("Content-Encoding")=="gzip"):
2159 try:
2160 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
2161 except zlib.error:
2162 pass;
2163 if(httpheaderout.get("Content-Encoding")=="deflate"):
2164 try:
2165 returnval_content = zlib.decompress(returnval_content);
2166 except zlib.error:
2167 pass;
2168 if(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
2169 try:
2170 returnval_content = brotli.decompress(returnval_content);
2171 except brotli.error:
2172 pass;
2173 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason};
2174 geturls_text.close();
2175 return returnval;
2177 if(not havehttpx):
2178 def download_from_url_with_httpx2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2179 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
2180 return returnval;
2182 if(havehttpx):
2183 def download_from_url_file_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2184 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
2185 exec_time_start = time.time();
2186 myhash = hashlib.new("sha1");
2187 if(sys.version[0]=="2"):
2188 myhash.update(httpurl);
2189 myhash.update(str(buffersize));
2190 myhash.update(str(exec_time_start));
2191 if(sys.version[0]>="3"):
2192 myhash.update(httpurl.encode('utf-8'));
2193 myhash.update(str(buffersize).encode('utf-8'));
2194 myhash.update(str(exec_time_start).encode('utf-8'));
2195 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
2196 if(sleep<0):
2197 sleep = geturls_download_sleep;
2198 if(timeout<=0):
2199 timeout = 10;
2200 pretmpfilename = download_from_url_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
2201 if(not pretmpfilename):
2202 return False;
2203 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
2204 tmpfilename = f.name;
2205 try:
2206 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2207 except AttributeError:
2208 try:
2209 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2210 except ValueError:
2211 pass;
2212 except ValueError:
2213 pass;
2214 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason')};
2215 f.write(pretmpfilename['Content']);
2216 f.close();
2217 exec_time_end = time.time();
2218 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
2219 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
2220 return returnval;
2222 if(not havehttpx):
2223 def download_from_url_file_with_httpx2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2224 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
2225 return returnval;
2227 if(havehttpx):
2228 def download_from_url_to_file_with_httpx2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2229 global geturls_download_sleep;
2230 if(sleep<0):
2231 sleep = geturls_download_sleep;
2232 if(timeout<=0):
2233 timeout = 10;
2234 if(not outfile=="-"):
2235 outpath = outpath.rstrip(os.path.sep);
2236 filepath = os.path.realpath(outpath+os.path.sep+outfile);
2237 if(not os.path.exists(outpath)):
2238 os.makedirs(outpath);
2239 if(os.path.exists(outpath) and os.path.isfile(outpath)):
2240 return False;
2241 if(os.path.exists(filepath) and os.path.isdir(filepath)):
2242 return False;
2243 pretmpfilename = download_from_url_file_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2244 if(not pretmpfilename):
2245 return False;
2246 tmpfilename = pretmpfilename['Filename'];
2247 downloadsize = int(os.path.getsize(tmpfilename));
2248 fulldatasize = 0;
2249 log.info("Moving file "+tmpfilename+" to "+filepath);
2250 exec_time_start = time.time();
2251 shutil.move(tmpfilename, filepath);
2252 try:
2253 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2254 except AttributeError:
2255 try:
2256 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2257 except ValueError:
2258 pass;
2259 except ValueError:
2260 pass;
2261 exec_time_end = time.time();
2262 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
2263 if(os.path.exists(tmpfilename)):
2264 os.remove(tmpfilename);
2265 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
2266 if(outfile=="-"):
2267 pretmpfilename = download_from_url_file_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2268 tmpfilename = pretmpfilename['Filename'];
2269 downloadsize = int(os.path.getsize(tmpfilename));
2270 fulldatasize = 0;
2271 prevdownsize = 0;
2272 exec_time_start = time.time();
2273 with open(tmpfilename, 'rb') as ft:
2274 f = BytesIO();
2275 while True:
2276 databytes = ft.read(buffersize[1]);
2277 if not databytes: break;
2278 datasize = len(databytes);
2279 fulldatasize = datasize + fulldatasize;
2280 percentage = "";
2281 if(downloadsize>0):
2282 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2283 downloaddiff = fulldatasize - prevdownsize;
2284 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2285 prevdownsize = fulldatasize;
2286 f.write(databytes);
2287 f.seek(0);
2288 fdata = f.getvalue();
2289 f.close();
2290 ft.close();
2291 os.remove(tmpfilename);
2292 exec_time_end = time.time();
2293 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
2294 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
2295 return returnval;
2297 if(not havehttpx):
2298 def download_from_url_to_file_with_httpx2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2299 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
2300 return returnval;
2302 if(havehttpcore):
2303 def download_from_url_with_httpcore(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2304 global geturls_download_sleep, havebrotli;
2305 if(sleep<0):
2306 sleep = geturls_download_sleep;
2307 if(timeout<=0):
2308 timeout = 10;
2309 urlparts = urlparse.urlparse(httpurl);
2310 if(isinstance(httpheaders, list)):
2311 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
2312 httpheaders = fix_header_names(httpheaders);
2313 if(httpuseragent is not None):
2314 if('User-Agent' in httpheaders):
2315 httpheaders['User-Agent'] = httpuseragent;
2316 else:
2317 httpuseragent.update({'User-Agent': httpuseragent});
2318 if(httpreferer is not None):
2319 if('Referer' in httpheaders):
2320 httpheaders['Referer'] = httpreferer;
2321 else:
2322 httpuseragent.update({'Referer': httpreferer});
2323 if(urlparts.username is not None or urlparts.password is not None):
2324 if(sys.version[0]=="2"):
2325 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
2326 if(sys.version[0]>="3"):
2327 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
2328 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
2329 time.sleep(sleep);
2330 if(postdata is not None and not isinstance(postdata, dict)):
2331 postdata = urlencode(postdata);
2332 try:
2333 if(httpmethod=="GET"):
2334 httpx_pool = httpcore.ConnectionPool(http1=True, http2=False);
2335 geturls_text = httpx_pool.request("GET", httpurl, headers=httpheaders);
2336 elif(httpmethod=="POST"):
2337 httpx_pool = httpcore.ConnectionPool(http1=True, http2=False);
2338 geturls_text = httpx_pool.request("GET", httpurl, data=postdata, headers=httpheaders);
2339 else:
2340 httpx_pool = httpcore.ConnectionPool(http1=True, http2=False);
2341 geturls_text = httpx_pool.request("GET", httpurl, headers=httpheaders);
2342 except httpcore.ConnectTimeout:
2343 log.info("Error With URL "+httpurl);
2344 return False;
2345 except httpcore.ConnectError:
2346 log.info("Error With URL "+httpurl);
2347 return False;
2348 except socket.timeout:
2349 log.info("Error With URL "+httpurl);
2350 return False;
2351 httpcodeout = geturls_text.status;
2352 httpcodereason = http_status_to_reason(geturls_text.status);
2353 httpversionout = "1.1";
2354 httpmethodout = httpmethod;
2355 httpurlout = str(httpurl);
2356 httpheaderout = geturls_text.headers;
2357 httpheadersentout = httpheaders;
2358 if(isinstance(httpheaderout, list)):
2359 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
2360 if(sys.version[0]=="2"):
2361 try:
2362 prehttpheaderout = httpheaderout;
2363 httpheaderkeys = httpheaderout.keys();
2364 imax = len(httpheaderkeys);
2365 ic = 0;
2366 httpheaderout = {};
2367 while(ic < imax):
2368 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
2369 ic += 1;
2370 except AttributeError:
2371 pass;
2372 httpheaderout = fix_header_names(httpheaderout);
2373 if(isinstance(httpheadersentout, list)):
2374 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
2375 httpheadersentout = fix_header_names(httpheadersentout);
2376 log.info("Downloading URL "+httpurl);
2377 downloadsize = httpheaderout.get('Content-Length');
2378 if(downloadsize is not None):
2379 downloadsize = int(downloadsize);
2380 if downloadsize is None: downloadsize = 0;
2381 fulldatasize = 0;
2382 prevdownsize = 0;
2383 log.info("Downloading URL "+httpurl);
2384 with BytesIO() as strbuf:
2385 while True:
2386 databytes = geturls_text.read(buffersize);
2387 if not databytes: break;
2388 datasize = len(databytes);
2389 fulldatasize = datasize + fulldatasize;
2390 percentage = "";
2391 if(downloadsize>0):
2392 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2393 downloaddiff = fulldatasize - prevdownsize;
2394 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2395 prevdownsize = fulldatasize;
2396 strbuf.write(databytes);
2397 strbuf.seek(0);
2398 returnval_content = strbuf.read();
2399 if(httpheaderout.get("Content-Encoding")=="gzip"):
2400 try:
2401 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
2402 except zlib.error:
2403 pass;
2404 if(httpheaderout.get("Content-Encoding")=="deflate"):
2405 try:
2406 returnval_content = zlib.decompress(returnval_content);
2407 except zlib.error:
2408 pass;
2409 if(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
2410 try:
2411 returnval_content = brotli.decompress(returnval_content);
2412 except brotli.error:
2413 pass;
2414 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason};
2415 geturls_text.close();
2416 return returnval;
2418 if(not havehttpcore):
2419 def download_from_url_with_httpcore(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2420 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
2421 return returnval;
2423 if(havehttpcore):
2424 def download_from_url_file_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2425 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
2426 exec_time_start = time.time();
2427 myhash = hashlib.new("sha1");
2428 if(sys.version[0]=="2"):
2429 myhash.update(httpurl);
2430 myhash.update(str(buffersize));
2431 myhash.update(str(exec_time_start));
2432 if(sys.version[0]>="3"):
2433 myhash.update(httpurl.encode('utf-8'));
2434 myhash.update(str(buffersize).encode('utf-8'));
2435 myhash.update(str(exec_time_start).encode('utf-8'));
2436 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
2437 if(sleep<0):
2438 sleep = geturls_download_sleep;
2439 if(timeout<=0):
2440 timeout = 10;
2441 pretmpfilename = download_from_url_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
2442 if(not pretmpfilename):
2443 return False;
2444 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
2445 tmpfilename = f.name;
2446 try:
2447 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2448 except AttributeError:
2449 try:
2450 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2451 except ValueError:
2452 pass;
2453 except ValueError:
2454 pass;
2455 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason')};
2456 f.write(pretmpfilename['Content']);
2457 f.close();
2458 exec_time_end = time.time();
2459 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
2460 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
2461 return returnval;
2463 if(not havehttpcore):
2464 def download_from_url_file_with_httpcore(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2465 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
2466 return returnval;
2468 if(havehttpcore):
2469 def download_from_url_to_file_with_httpcore(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2470 global geturls_download_sleep;
2471 if(sleep<0):
2472 sleep = geturls_download_sleep;
2473 if(timeout<=0):
2474 timeout = 10;
2475 if(not outfile=="-"):
2476 outpath = outpath.rstrip(os.path.sep);
2477 filepath = os.path.realpath(outpath+os.path.sep+outfile);
2478 if(not os.path.exists(outpath)):
2479 os.makedirs(outpath);
2480 if(os.path.exists(outpath) and os.path.isfile(outpath)):
2481 return False;
2482 if(os.path.exists(filepath) and os.path.isdir(filepath)):
2483 return False;
2484 pretmpfilename = download_from_url_file_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2485 if(not pretmpfilename):
2486 return False;
2487 tmpfilename = pretmpfilename['Filename'];
2488 downloadsize = int(os.path.getsize(tmpfilename));
2489 fulldatasize = 0;
2490 log.info("Moving file "+tmpfilename+" to "+filepath);
2491 exec_time_start = time.time();
2492 shutil.move(tmpfilename, filepath);
2493 try:
2494 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2495 except AttributeError:
2496 try:
2497 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2498 except ValueError:
2499 pass;
2500 except ValueError:
2501 pass;
2502 exec_time_end = time.time();
2503 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
2504 if(os.path.exists(tmpfilename)):
2505 os.remove(tmpfilename);
2506 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
2507 if(outfile=="-"):
2508 pretmpfilename = download_from_url_file_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2509 tmpfilename = pretmpfilename['Filename'];
2510 downloadsize = int(os.path.getsize(tmpfilename));
2511 fulldatasize = 0;
2512 prevdownsize = 0;
2513 exec_time_start = time.time();
2514 with open(tmpfilename, 'rb') as ft:
2515 f = BytesIO();
2516 while True:
2517 databytes = ft.read(buffersize[1]);
2518 if not databytes: break;
2519 datasize = len(databytes);
2520 fulldatasize = datasize + fulldatasize;
2521 percentage = "";
2522 if(downloadsize>0):
2523 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2524 downloaddiff = fulldatasize - prevdownsize;
2525 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2526 prevdownsize = fulldatasize;
2527 f.write(databytes);
2528 f.seek(0);
2529 fdata = f.getvalue();
2530 f.close();
2531 ft.close();
2532 os.remove(tmpfilename);
2533 exec_time_end = time.time();
2534 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
2535 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
2536 return returnval;
2538 if(not havehttpcore):
2539 def download_from_url_to_file_with_httpcore(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2540 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
2541 return returnval;
2543 if(havehttpcore):
2544 def download_from_url_with_httpcore2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2545 global geturls_download_sleep, havebrotli;
2546 if(sleep<0):
2547 sleep = geturls_download_sleep;
2548 if(timeout<=0):
2549 timeout = 10;
2550 urlparts = urlparse.urlparse(httpurl);
2551 if(isinstance(httpheaders, list)):
2552 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
2553 httpheaders = fix_header_names(httpheaders);
2554 if(httpuseragent is not None):
2555 if('User-Agent' in httpheaders):
2556 httpheaders['User-Agent'] = httpuseragent;
2557 else:
2558 httpuseragent.update({'User-Agent': httpuseragent});
2559 if(httpreferer is not None):
2560 if('Referer' in httpheaders):
2561 httpheaders['Referer'] = httpreferer;
2562 else:
2563 httpuseragent.update({'Referer': httpreferer});
2564 if(urlparts.username is not None or urlparts.password is not None):
2565 if(sys.version[0]=="2"):
2566 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
2567 if(sys.version[0]>="3"):
2568 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
2569 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
2570 time.sleep(sleep);
2571 if(postdata is not None and not isinstance(postdata, dict)):
2572 postdata = urlencode(postdata);
2573 try:
2574 if(httpmethod=="GET"):
2575 httpx_pool = httpcore.ConnectionPool(http1=True, http2=True);
2576 geturls_text = httpx_pool.request("GET", httpurl, headers=httpheaders);
2577 elif(httpmethod=="POST"):
2578 httpx_pool = httpcore.ConnectionPool(http1=True, http2=True);
2579 geturls_text = httpx_pool.request("GET", httpurl, data=postdata, headers=httpheaders);
2580 else:
2581 httpx_pool = httpcore.ConnectionPool(http1=True, http2=True);
2582 geturls_text = httpx_pool.request("GET", httpurl, headers=httpheaders);
2583 except httpcore.ConnectTimeout:
2584 log.info("Error With URL "+httpurl);
2585 return False;
2586 except httpcore.ConnectError:
2587 log.info("Error With URL "+httpurl);
2588 return False;
2589 except socket.timeout:
2590 log.info("Error With URL "+httpurl);
2591 return False;
2592 httpcodeout = geturls_text.status;
2593 httpcodereason = geturls_text.reason_phrase;
2594 httpversionout = "1.1";
2595 httpmethodout = httpmethod;
2596 httpurlout = str(httpurl);
2597 httpheaderout = geturls_text.headers;
2598 httpheadersentout = httpheaders;
2599 if(isinstance(httpheaderout, list)):
2600 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
2601 if(sys.version[0]=="2"):
2602 try:
2603 prehttpheaderout = httpheaderout;
2604 httpheaderkeys = httpheaderout.keys();
2605 imax = len(httpheaderkeys);
2606 ic = 0;
2607 httpheaderout = {};
2608 while(ic < imax):
2609 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
2610 ic += 1;
2611 except AttributeError:
2612 pass;
2613 httpheaderout = fix_header_names(httpheaderout);
2614 if(isinstance(httpheadersentout, list)):
2615 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
2616 httpheadersentout = fix_header_names(httpheadersentout);
2617 log.info("Downloading URL "+httpurl);
2618 downloadsize = httpheaderout.get('Content-Length');
2619 if(downloadsize is not None):
2620 downloadsize = int(downloadsize);
2621 if downloadsize is None: downloadsize = 0;
2622 fulldatasize = 0;
2623 prevdownsize = 0;
2624 log.info("Downloading URL "+httpurl);
2625 with BytesIO() as strbuf:
2626 while True:
2627 databytes = geturls_text.read(buffersize);
2628 if not databytes: break;
2629 datasize = len(databytes);
2630 fulldatasize = datasize + fulldatasize;
2631 percentage = "";
2632 if(downloadsize>0):
2633 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2634 downloaddiff = fulldatasize - prevdownsize;
2635 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2636 prevdownsize = fulldatasize;
2637 strbuf.write(databytes);
2638 strbuf.seek(0);
2639 returnval_content = strbuf.read();
2640 if(httpheaderout.get("Content-Encoding")=="gzip"):
2641 try:
2642 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
2643 except zlib.error:
2644 pass;
2645 if(httpheaderout.get("Content-Encoding")=="deflate"):
2646 try:
2647 returnval_content = zlib.decompress(returnval_content);
2648 except zlib.error:
2649 pass;
2650 if(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
2651 try:
2652 returnval_content = brotli.decompress(returnval_content);
2653 except brotli.error:
2654 pass;
2655 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason};
2656 geturls_text.close();
2657 return returnval;
2659 if(not havehttpcore):
2660 def download_from_url_with_httpcore2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2661 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
2662 return returnval;
2664 if(havehttpcore):
2665 def download_from_url_file_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2666 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
2667 exec_time_start = time.time();
2668 myhash = hashlib.new("sha1");
2669 if(sys.version[0]=="2"):
2670 myhash.update(httpurl);
2671 myhash.update(str(buffersize));
2672 myhash.update(str(exec_time_start));
2673 if(sys.version[0]>="3"):
2674 myhash.update(httpurl.encode('utf-8'));
2675 myhash.update(str(buffersize).encode('utf-8'));
2676 myhash.update(str(exec_time_start).encode('utf-8'));
2677 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
2678 if(sleep<0):
2679 sleep = geturls_download_sleep;
2680 if(timeout<=0):
2681 timeout = 10;
2682 pretmpfilename = download_from_url_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
2683 if(not pretmpfilename):
2684 return False;
2685 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
2686 tmpfilename = f.name;
2687 try:
2688 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2689 except AttributeError:
2690 try:
2691 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2692 except ValueError:
2693 pass;
2694 except ValueError:
2695 pass;
2696 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason')};
2697 f.write(pretmpfilename['Content']);
2698 f.close();
2699 exec_time_end = time.time();
2700 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
2701 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
2702 return returnval;
2704 if(not havehttpcore):
2705 def download_from_url_file_with_httpcore2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2706 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
2707 return returnval;
2709 if(havehttpcore):
2710 def download_from_url_to_file_with_httpcore2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2711 global geturls_download_sleep;
2712 if(sleep<0):
2713 sleep = geturls_download_sleep;
2714 if(timeout<=0):
2715 timeout = 10;
2716 if(not outfile=="-"):
2717 outpath = outpath.rstrip(os.path.sep);
2718 filepath = os.path.realpath(outpath+os.path.sep+outfile);
2719 if(not os.path.exists(outpath)):
2720 os.makedirs(outpath);
2721 if(os.path.exists(outpath) and os.path.isfile(outpath)):
2722 return False;
2723 if(os.path.exists(filepath) and os.path.isdir(filepath)):
2724 return False;
2725 pretmpfilename = download_from_url_file_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2726 if(not pretmpfilename):
2727 return False;
2728 tmpfilename = pretmpfilename['Filename'];
2729 downloadsize = int(os.path.getsize(tmpfilename));
2730 fulldatasize = 0;
2731 log.info("Moving file "+tmpfilename+" to "+filepath);
2732 exec_time_start = time.time();
2733 shutil.move(tmpfilename, filepath);
2734 try:
2735 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2736 except AttributeError:
2737 try:
2738 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2739 except ValueError:
2740 pass;
2741 except ValueError:
2742 pass;
2743 exec_time_end = time.time();
2744 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
2745 if(os.path.exists(tmpfilename)):
2746 os.remove(tmpfilename);
2747 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
2748 if(outfile=="-"):
2749 pretmpfilename = download_from_url_file_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2750 tmpfilename = pretmpfilename['Filename'];
2751 downloadsize = int(os.path.getsize(tmpfilename));
2752 fulldatasize = 0;
2753 prevdownsize = 0;
2754 exec_time_start = time.time();
2755 with open(tmpfilename, 'rb') as ft:
2756 f = BytesIO();
2757 while True:
2758 databytes = ft.read(buffersize[1]);
2759 if not databytes: break;
2760 datasize = len(databytes);
2761 fulldatasize = datasize + fulldatasize;
2762 percentage = "";
2763 if(downloadsize>0):
2764 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2765 downloaddiff = fulldatasize - prevdownsize;
2766 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2767 prevdownsize = fulldatasize;
2768 f.write(databytes);
2769 f.seek(0);
2770 fdata = f.getvalue();
2771 f.close();
2772 ft.close();
2773 os.remove(tmpfilename);
2774 exec_time_end = time.time();
2775 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
2776 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
2777 return returnval;
2779 if(not havehttpx):
2780 def download_from_url_to_file_with_httpcore2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2781 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
2782 return returnval;
2784 if(haveurllib3):
2785 def download_from_url_with_request3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2786 returnval = download_from_url_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
2787 return returnval;
2789 if(not haveurllib3):
2790 def download_from_url_with_request3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2791 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
2792 return returnval;
2794 if(haveurllib3):
2795 def download_from_url_file_with_request3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2796 returnval = download_from_url_file_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
2797 return returnval;
2799 if(not haveurllib3):
2800 def download_from_url_file_with_request3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2801 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
2802 return returnval;
2804 if(haveurllib3):
2805 def download_from_url_to_file_with_request3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2806 returnval = download_from_url_to_file_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
2807 return returnval;
2809 if(not haveurllib3):
2810 def download_from_url_to_file_with_request3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2811 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
2812 return returnval;
2814 if(haveurllib3):
2815 def download_from_url_with_urllib3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2816 global geturls_download_sleep, havebrotli;
2817 if(sleep<0):
2818 sleep = geturls_download_sleep;
2819 if(timeout<=0):
2820 timeout = 10;
2821 urlparts = urlparse.urlparse(httpurl);
2822 if(isinstance(httpheaders, list)):
2823 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
2824 httpheaders = fix_header_names(httpheaders);
2825 if(httpuseragent is not None):
2826 if('User-Agent' in httpheaders):
2827 httpheaders['User-Agent'] = httpuseragent;
2828 else:
2829 httpuseragent.update({'User-Agent': httpuseragent});
2830 if(httpreferer is not None):
2831 if('Referer' in httpheaders):
2832 httpheaders['Referer'] = httpreferer;
2833 else:
2834 httpuseragent.update({'Referer': httpreferer});
2835 if(urlparts.username is not None or urlparts.password is not None):
2836 if(sys.version[0]=="2"):
2837 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
2838 if(sys.version[0]>="3"):
2839 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
2840 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
2841 time.sleep(sleep);
2842 timeout = urllib3.util.Timeout(connect=timeout, read=timeout);
2843 urllib_pool = urllib3.PoolManager(headers=httpheaders, timeout=timeout);
2844 if(postdata is not None and not isinstance(postdata, dict)):
2845 postdata = urlencode(postdata);
2846 try:
2847 if(httpmethod=="GET"):
2848 geturls_text = urllib_pool.request("GET", httpurl, headers=httpheaders, preload_content=False);
2849 elif(httpmethod=="POST"):
2850 geturls_text = urllib_pool.request("POST", httpurl, body=postdata, headers=httpheaders, preload_content=False);
2851 else:
2852 geturls_text = urllib_pool.request("GET", httpurl, headers=httpheaders, preload_content=False);
2853 except urllib3.exceptions.ConnectTimeoutError:
2854 log.info("Error With URL "+httpurl);
2855 return False;
2856 except urllib3.exceptions.ConnectError:
2857 log.info("Error With URL "+httpurl);
2858 return False;
2859 except urllib3.exceptions.MaxRetryError:
2860 log.info("Error With URL "+httpurl);
2861 return False;
2862 except socket.timeout:
2863 log.info("Error With URL "+httpurl);
2864 return False;
2865 except ValueError:
2866 log.info("Error With URL "+httpurl);
2867 return False;
2868 httpcodeout = geturls_text.status;
2869 httpcodereason = geturls_text.reason;
2870 if(geturls_text.version=="10"):
2871 httpversionout = "1.0";
2872 else:
2873 httpversionout = "1.1";
2874 httpmethodout = httpmethod;
2875 httpurlout = geturls_text.geturl();
2876 httpheaderout = geturls_text.info();
2877 httpheadersentout = httpheaders;
2878 if(isinstance(httpheaderout, list)):
2879 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
2880 if(sys.version[0]=="2"):
2881 try:
2882 prehttpheaderout = httpheaderout;
2883 httpheaderkeys = httpheaderout.keys();
2884 imax = len(httpheaderkeys);
2885 ic = 0;
2886 httpheaderout = {};
2887 while(ic < imax):
2888 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
2889 ic += 1;
2890 except AttributeError:
2891 pass;
2892 httpheaderout = fix_header_names(httpheaderout);
2893 if(isinstance(httpheadersentout, list)):
2894 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
2895 httpheadersentout = fix_header_names(httpheadersentout);
2896 log.info("Downloading URL "+httpurl);
2897 downloadsize = httpheaderout.get('Content-Length');
2898 if(downloadsize is not None):
2899 downloadsize = int(downloadsize);
2900 if downloadsize is None: downloadsize = 0;
2901 fulldatasize = 0;
2902 prevdownsize = 0;
2903 log.info("Downloading URL "+httpurl);
2904 with BytesIO() as strbuf:
2905 while True:
2906 databytes = geturls_text.read(buffersize);
2907 if not databytes: break;
2908 datasize = len(databytes);
2909 fulldatasize = datasize + fulldatasize;
2910 percentage = "";
2911 if(downloadsize>0):
2912 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2913 downloaddiff = fulldatasize - prevdownsize;
2914 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2915 prevdownsize = fulldatasize;
2916 strbuf.write(databytes);
2917 strbuf.seek(0);
2918 returnval_content = strbuf.read();
2919 if(httpheaderout.get("Content-Encoding")=="gzip"):
2920 try:
2921 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
2922 except zlib.error:
2923 pass;
2924 if(httpheaderout.get("Content-Encoding")=="deflate"):
2925 try:
2926 returnval_content = zlib.decompress(returnval_content);
2927 except zlib.error:
2928 pass;
2929 if(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
2930 try:
2931 returnval_content = brotli.decompress(returnval_content);
2932 except brotli.error:
2933 pass;
2934 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason};
2935 geturls_text.close();
2936 return returnval;
2938 if(not haveurllib3):
2939 def download_from_url_with_urllib3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2940 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
2941 return returnval;
2943 if(haveurllib3):
2944 def download_from_url_file_with_urllib3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2945 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
2946 exec_time_start = time.time();
2947 myhash = hashlib.new("sha1");
2948 if(sys.version[0]=="2"):
2949 myhash.update(httpurl);
2950 myhash.update(str(buffersize));
2951 myhash.update(str(exec_time_start));
2952 if(sys.version[0]>="3"):
2953 myhash.update(httpurl.encode('utf-8'));
2954 myhash.update(str(buffersize).encode('utf-8'));
2955 myhash.update(str(exec_time_start).encode('utf-8'));
2956 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
2957 if(sleep<0):
2958 sleep = geturls_download_sleep;
2959 if(timeout<=0):
2960 timeout = 10;
2961 pretmpfilename = download_from_url_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
2962 if(not pretmpfilename):
2963 return False;
2964 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
2965 tmpfilename = f.name;
2966 try:
2967 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2968 except AttributeError:
2969 try:
2970 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2971 except ValueError:
2972 pass;
2973 except ValueError:
2974 pass;
2975 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason')};
2976 f.write(pretmpfilename['Content']);
2977 f.close();
2978 exec_time_end = time.time();
2979 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
2980 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
2981 return returnval;
2983 if(not haveurllib3):
2984 def download_from_url_file_with_urllib3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2985 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
2986 return returnval;
2988 if(haveurllib3):
2989 def download_from_url_to_file_with_urllib3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2990 global geturls_download_sleep;
2991 if(sleep<0):
2992 sleep = geturls_download_sleep;
2993 if(timeout<=0):
2994 timeout = 10;
2995 if(not outfile=="-"):
2996 outpath = outpath.rstrip(os.path.sep);
2997 filepath = os.path.realpath(outpath+os.path.sep+outfile);
2998 if(not os.path.exists(outpath)):
2999 os.makedirs(outpath);
3000 if(os.path.exists(outpath) and os.path.isfile(outpath)):
3001 return False;
3002 if(os.path.exists(filepath) and os.path.isdir(filepath)):
3003 return False;
3004 pretmpfilename = download_from_url_file_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3005 if(not pretmpfilename):
3006 return False;
3007 tmpfilename = pretmpfilename['Filename'];
3008 downloadsize = int(os.path.getsize(tmpfilename));
3009 fulldatasize = 0;
3010 log.info("Moving file "+tmpfilename+" to "+filepath);
3011 exec_time_start = time.time();
3012 shutil.move(tmpfilename, filepath);
3013 try:
3014 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
3015 except AttributeError:
3016 try:
3017 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3018 except ValueError:
3019 pass;
3020 except ValueError:
3021 pass;
3022 exec_time_end = time.time();
3023 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
3024 if(os.path.exists(tmpfilename)):
3025 os.remove(tmpfilename);
3026 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
3027 if(outfile=="-"):
3028 pretmpfilename = download_from_url_file_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3029 tmpfilename = pretmpfilename['Filename'];
3030 downloadsize = int(os.path.getsize(tmpfilename));
3031 fulldatasize = 0;
3032 prevdownsize = 0;
3033 exec_time_start = time.time();
3034 with open(tmpfilename, 'rb') as ft:
3035 f = BytesIO();
3036 while True:
3037 databytes = ft.read(buffersize[1]);
3038 if not databytes: break;
3039 datasize = len(databytes);
3040 fulldatasize = datasize + fulldatasize;
3041 percentage = "";
3042 if(downloadsize>0):
3043 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3044 downloaddiff = fulldatasize - prevdownsize;
3045 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3046 prevdownsize = fulldatasize;
3047 f.write(databytes);
3048 f.seek(0);
3049 fdata = f.getvalue();
3050 f.close();
3051 ft.close();
3052 os.remove(tmpfilename);
3053 exec_time_end = time.time();
3054 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
3055 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
3056 return returnval;
3058 if(not haveurllib3):
3059 def download_from_url_to_file_with_urllib3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3060 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
3061 return returnval;
3063 if(havemechanize):
3064 def download_from_url_with_mechanize(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3065 global geturls_download_sleep, havebrotli;
3066 if(sleep<0):
3067 sleep = geturls_download_sleep;
3068 if(timeout<=0):
3069 timeout = 10;
3070 urlparts = urlparse.urlparse(httpurl);
3071 if(isinstance(httpheaders, list)):
3072 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
3073 httpheaders = fix_header_names(httpheaders);
3074 if(httpuseragent is not None):
3075 if('User-Agent' in httpheaders):
3076 httpheaders['User-Agent'] = httpuseragent;
3077 else:
3078 httpuseragent.update({'User-Agent': httpuseragent});
3079 if(httpreferer is not None):
3080 if('Referer' in httpheaders):
3081 httpheaders['Referer'] = httpreferer;
3082 else:
3083 httpuseragent.update({'Referer': httpreferer});
3084 if(urlparts.username is not None or urlparts.password is not None):
3085 if(sys.version[0]=="2"):
3086 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
3087 if(sys.version[0]>="3"):
3088 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
3089 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
3090 geturls_opener = mechanize.Browser();
3091 if(isinstance(httpheaders, dict)):
3092 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
3093 time.sleep(sleep);
3094 geturls_opener.addheaders = httpheaders;
3095 geturls_opener.set_cookiejar(httpcookie);
3096 geturls_opener.set_handle_robots(False);
3097 if(postdata is not None and not isinstance(postdata, dict)):
3098 postdata = urlencode(postdata);
3099 try:
3100 if(httpmethod=="GET"):
3101 geturls_text = geturls_opener.open(httpurl);
3102 elif(httpmethod=="POST"):
3103 geturls_text = geturls_opener.open(httpurl, data=postdata);
3104 else:
3105 geturls_text = geturls_opener.open(httpurl);
3106 except mechanize.HTTPError as geturls_text_error:
3107 geturls_text = geturls_text_error;
3108 log.info("Error With URL "+httpurl);
3109 except URLError:
3110 log.info("Error With URL "+httpurl);
3111 return False;
3112 except socket.timeout:
3113 log.info("Error With URL "+httpurl);
3114 return False;
3115 httpcodeout = geturls_text.code;
3116 httpcodereason = geturls_text.msg;
3117 httpversionout = "1.1";
3118 httpmethodout = httpmethod;
3119 httpurlout = geturls_text.geturl();
3120 httpheaderout = geturls_text.info();
3121 reqhead = geturls_opener.request;
3122 httpheadersentout = reqhead.header_items();
3123 if(isinstance(httpheaderout, list)):
3124 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
3125 if(sys.version[0]=="2"):
3126 try:
3127 prehttpheaderout = httpheaderout;
3128 httpheaderkeys = httpheaderout.keys();
3129 imax = len(httpheaderkeys);
3130 ic = 0;
3131 httpheaderout = {};
3132 while(ic < imax):
3133 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
3134 ic += 1;
3135 except AttributeError:
3136 pass;
3137 httpheaderout = fix_header_names(httpheaderout);
3138 if(isinstance(httpheadersentout, list)):
3139 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
3140 httpheadersentout = fix_header_names(httpheadersentout);
3141 log.info("Downloading URL "+httpurl);
3142 downloadsize = httpheaderout.get('Content-Length');
3143 if(downloadsize is not None):
3144 downloadsize = int(downloadsize);
3145 if downloadsize is None: downloadsize = 0;
3146 fulldatasize = 0;
3147 prevdownsize = 0;
3148 log.info("Downloading URL "+httpurl);
3149 with BytesIO() as strbuf:
3150 while True:
3151 databytes = geturls_text.read(buffersize);
3152 if not databytes: break;
3153 datasize = len(databytes);
3154 fulldatasize = datasize + fulldatasize;
3155 percentage = "";
3156 if(downloadsize>0):
3157 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3158 downloaddiff = fulldatasize - prevdownsize;
3159 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3160 prevdownsize = fulldatasize;
3161 strbuf.write(databytes);
3162 strbuf.seek(0);
3163 returnval_content = strbuf.read();
3164 if(httpheaderout.get("Content-Encoding")=="gzip"):
3165 try:
3166 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
3167 except zlib.error:
3168 pass;
3169 if(httpheaderout.get("Content-Encoding")=="deflate"):
3170 try:
3171 returnval_content = zlib.decompress(returnval_content);
3172 except zlib.error:
3173 pass;
3174 if(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
3175 try:
3176 returnval_content = brotli.decompress(returnval_content);
3177 except brotli.error:
3178 pass;
3179 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason};
3180 geturls_text.close();
3181 return returnval;
3183 if(not havemechanize):
3184 def download_from_url_with_mechanize(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3185 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
3186 return returnval;
3188 if(havemechanize):
3189 def download_from_url_file_with_mechanize(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3190 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
3191 exec_time_start = time.time();
3192 myhash = hashlib.new("sha1");
3193 if(sys.version[0]=="2"):
3194 myhash.update(httpurl);
3195 myhash.update(str(buffersize));
3196 myhash.update(str(exec_time_start));
3197 if(sys.version[0]>="3"):
3198 myhash.update(httpurl.encode('utf-8'));
3199 myhash.update(str(buffersize).encode('utf-8'));
3200 myhash.update(str(exec_time_start).encode('utf-8'));
3201 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
3202 if(sleep<0):
3203 sleep = geturls_download_sleep;
3204 if(timeout<=0):
3205 timeout = 10;
3206 pretmpfilename = download_from_url_with_mechanize(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
3207 if(not pretmpfilename):
3208 return False;
3209 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
3210 tmpfilename = f.name;
3211 try:
3212 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
3213 except AttributeError:
3214 try:
3215 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3216 except ValueError:
3217 pass;
3218 except ValueError:
3219 pass;
3220 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason')};
3221 f.write(pretmpfilename['Content']);
3222 f.close();
3223 exec_time_end = time.time();
3224 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
3225 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
3226 return returnval;
3228 if(not havemechanize):
3229 def download_from_url_file_with_mechanize(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3230 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
3231 return returnval;
3233 if(havemechanize):
3234 def download_from_url_to_file_with_mechanize(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3235 global geturls_download_sleep;
3236 if(sleep<0):
3237 sleep = geturls_download_sleep;
3238 if(timeout<=0):
3239 timeout = 10;
3240 if(not outfile=="-"):
3241 outpath = outpath.rstrip(os.path.sep);
3242 filepath = os.path.realpath(outpath+os.path.sep+outfile);
3243 if(not os.path.exists(outpath)):
3244 os.makedirs(outpath);
3245 if(os.path.exists(outpath) and os.path.isfile(outpath)):
3246 return False;
3247 if(os.path.exists(filepath) and os.path.isdir(filepath)):
3248 return False;
3249 pretmpfilename = download_from_url_file_with_mechanize(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3250 if(not pretmpfilename):
3251 return False;
3252 tmpfilename = pretmpfilename['Filename'];
3253 downloadsize = int(os.path.getsize(tmpfilename));
3254 fulldatasize = 0;
3255 log.info("Moving file "+tmpfilename+" to "+filepath);
3256 exec_time_start = time.time();
3257 shutil.move(tmpfilename, filepath);
3258 try:
3259 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
3260 except AttributeError:
3261 try:
3262 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3263 except ValueError:
3264 pass;
3265 except ValueError:
3266 pass;
3267 exec_time_end = time.time();
3268 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
3269 if(os.path.exists(tmpfilename)):
3270 os.remove(tmpfilename);
3271 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
3272 if(outfile=="-"):
3273 pretmpfilename = download_from_url_file_with_mechanize(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3274 tmpfilename = pretmpfilename['Filename'];
3275 downloadsize = int(os.path.getsize(tmpfilename));
3276 fulldatasize = 0;
3277 prevdownsize = 0;
3278 exec_time_start = time.time();
3279 with open(tmpfilename, 'rb') as ft:
3280 f = BytesIO();
3281 while True:
3282 databytes = ft.read(buffersize[1]);
3283 if not databytes: break;
3284 datasize = len(databytes);
3285 fulldatasize = datasize + fulldatasize;
3286 percentage = "";
3287 if(downloadsize>0):
3288 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3289 downloaddiff = fulldatasize - prevdownsize;
3290 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3291 prevdownsize = fulldatasize;
3292 f.write(databytes);
3293 f.seek(0);
3294 fdata = f.getvalue();
3295 f.close();
3296 ft.close();
3297 os.remove(tmpfilename);
3298 exec_time_end = time.time();
3299 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
3300 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
3301 return returnval;
3303 if(not havemechanize):
3304 def download_from_url_to_file_with_mechanize(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3305 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
3306 return returnval;
3308 if(havepycurl):
3309 def download_from_url_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3310 global geturls_download_sleep, havebrotli;
3311 if(sleep<0):
3312 sleep = geturls_download_sleep;
3313 if(timeout<=0):
3314 timeout = 10;
3315 urlparts = urlparse.urlparse(httpurl);
3316 if(isinstance(httpheaders, list)):
3317 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
3318 httpheaders = fix_header_names(httpheaders);
3319 if(httpuseragent is not None):
3320 if('User-Agent' in httpheaders):
3321 httpheaders['User-Agent'] = httpuseragent;
3322 else:
3323 httpuseragent.update({'User-Agent': httpuseragent});
3324 if(httpreferer is not None):
3325 if('Referer' in httpheaders):
3326 httpheaders['Referer'] = httpreferer;
3327 else:
3328 httpuseragent.update({'Referer': httpreferer});
3329 if(urlparts.username is not None or urlparts.password is not None):
3330 if(sys.version[0]=="2"):
3331 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
3332 if(sys.version[0]>="3"):
3333 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
3334 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
3335 geturls_opener = build_opener(HTTPCookieProcessor(httpcookie));
3336 if(isinstance(httpheaders, dict)):
3337 httpheaders = make_http_headers_from_dict_to_pycurl(httpheaders);
3338 geturls_opener.addheaders = httpheaders;
3339 time.sleep(sleep);
3340 if(postdata is not None and not isinstance(postdata, dict)):
3341 postdata = urlencode(postdata);
3342 retrieved_body = BytesIO();
3343 retrieved_headers = BytesIO();
3344 try:
3345 if(httpmethod=="GET"):
3346 geturls_text = pycurl.Curl();
3347 geturls_text.setopt(geturls_text.URL, httpurl);
3348 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
3349 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
3350 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
3351 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
3352 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
3353 geturls_text.perform();
3354 elif(httpmethod=="POST"):
3355 geturls_text = pycurl.Curl();
3356 geturls_text.setopt(geturls_text.URL, httpurl);
3357 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
3358 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
3359 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
3360 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
3361 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
3362 geturls_text.setopt(geturls_text.POST, True);
3363 geturls_text.setopt(geturls_text.POSTFIELDS, postdata);
3364 geturls_text.perform();
3365 else:
3366 geturls_text = pycurl.Curl();
3367 geturls_text.setopt(geturls_text.URL, httpurl);
3368 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
3369 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
3370 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
3371 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
3372 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
3373 geturls_text.perform();
3374 retrieved_headers.seek(0);
3375 if(sys.version[0]=="2"):
3376 pycurlhead = retrieved_headers.read();
3377 if(sys.version[0]>="3"):
3378 pycurlhead = retrieved_headers.read().decode('UTF-8');
3379 pyhttpverinfo = re.findall(r'^HTTP/([0-9.]+) (\d+) ([A-Za-z\s]+)$', pycurlhead.splitlines()[0])[0];
3380 pycurlheadersout = make_http_headers_from_pycurl_to_dict(pycurlhead);
3381 retrieved_body.seek(0);
3382 except socket.timeout:
3383 log.info("Error With URL "+httpurl);
3384 return False;
3385 except socket.gaierror:
3386 log.info("Error With URL "+httpurl);
3387 return False;
3388 except ValueError:
3389 log.info("Error With URL "+httpurl);
3390 return False;
3391 httpcodeout = geturls_text.getinfo(geturls_text.HTTP_CODE);
3392 httpcodereason = http_status_to_reason(geturls_text.getinfo(geturls_text.HTTP_CODE));
3393 httpversionout = pyhttpverinfo[0];
3394 httpmethodout = httpmethod;
3395 httpurlout = geturls_text.getinfo(geturls_text.EFFECTIVE_URL);
3396 httpheaderout = pycurlheadersout;
3397 httpheadersentout = httpheaders;
3398 if(isinstance(httpheaderout, list)):
3399 httpheaderout = dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout)));
3400 if(sys.version[0]=="2"):
3401 try:
3402 prehttpheaderout = httpheaderout;
3403 httpheaderkeys = httpheaderout.keys();
3404 imax = len(httpheaderkeys);
3405 ic = 0;
3406 httpheaderout = {};
3407 while(ic < imax):
3408 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
3409 ic += 1;
3410 except AttributeError:
3411 pass;
3412 httpheaderout = fix_header_names(httpheaderout);
3413 if(isinstance(httpheadersentout, list)):
3414 httpheadersentout = dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout)));
3415 httpheadersentout = fix_header_names(httpheadersentout);
3416 log.info("Downloading URL "+httpurl);
3417 downloadsize = httpheaderout.get('Content-Length');
3418 if(downloadsize is not None):
3419 downloadsize = int(downloadsize);
3420 if downloadsize is None: downloadsize = 0;
3421 fulldatasize = 0;
3422 prevdownsize = 0;
3423 log.info("Downloading URL "+httpurl);
3424 with BytesIO() as strbuf:
3425 while True:
3426 databytes = retrieved_body.read(buffersize);
3427 if not databytes: break;
3428 datasize = len(databytes);
3429 fulldatasize = datasize + fulldatasize;
3430 percentage = "";
3431 if(downloadsize>0):
3432 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3433 downloaddiff = fulldatasize - prevdownsize;
3434 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3435 prevdownsize = fulldatasize;
3436 strbuf.write(databytes);
3437 strbuf.seek(0);
3438 returnval_content = strbuf.read();
3439 if(httpheaderout.get("Content-Encoding")=="gzip"):
3440 try:
3441 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
3442 except zlib.error:
3443 pass;
3444 if(httpheaderout.get("Content-Encoding")=="deflate"):
3445 try:
3446 returnval_content = zlib.decompress(returnval_content);
3447 except zlib.error:
3448 pass;
3449 if(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
3450 try:
3451 returnval_content = brotli.decompress(returnval_content);
3452 except brotli.error:
3453 pass;
3454 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason};
3455 geturls_text.close();
3456 return returnval;
3458 if(not havepycurl):
3459 def download_from_url_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3460 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
3461 return returnval;
3463 if(havepycurl):
3464 def download_from_url_file_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3465 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
3466 exec_time_start = time.time();
3467 myhash = hashlib.new("sha1");
3468 if(sys.version[0]=="2"):
3469 myhash.update(httpurl);
3470 myhash.update(str(buffersize));
3471 myhash.update(str(exec_time_start));
3472 if(sys.version[0]>="3"):
3473 myhash.update(httpurl.encode('utf-8'));
3474 myhash.update(str(buffersize).encode('utf-8'));
3475 myhash.update(str(exec_time_start).encode('utf-8'));
3476 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
3477 if(sleep<0):
3478 sleep = geturls_download_sleep;
3479 if(timeout<=0):
3480 timeout = 10;
3481 pretmpfilename = download_from_url_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
3482 if(not pretmpfilename):
3483 return False;
3484 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
3485 tmpfilename = f.name;
3486 try:
3487 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
3488 except AttributeError:
3489 try:
3490 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3491 except ValueError:
3492 pass;
3493 except ValueError:
3494 pass;
3495 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason')};
3496 f.write(pretmpfilename['Content']);
3497 f.close();
3498 exec_time_end = time.time();
3499 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
3500 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
3501 return returnval;
3503 if(not havepycurl):
3504 def download_from_url_file_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3505 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
3506 return returnval;
3508 if(havepycurl):
3509 def download_from_url_to_file_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3510 global geturls_download_sleep;
3511 if(sleep<0):
3512 sleep = geturls_download_sleep;
3513 if(timeout<=0):
3514 timeout = 10;
3515 if(not outfile=="-"):
3516 outpath = outpath.rstrip(os.path.sep);
3517 filepath = os.path.realpath(outpath+os.path.sep+outfile);
3518 if(not os.path.exists(outpath)):
3519 os.makedirs(outpath);
3520 if(os.path.exists(outpath) and os.path.isfile(outpath)):
3521 return False;
3522 if(os.path.exists(filepath) and os.path.isdir(filepath)):
3523 return False;
3524 pretmpfilename = download_from_url_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3525 if(not pretmpfilename):
3526 return False;
3527 tmpfilename = pretmpfilename['Filename'];
3528 downloadsize = int(os.path.getsize(tmpfilename));
3529 fulldatasize = 0;
3530 log.info("Moving file "+tmpfilename+" to "+filepath);
3531 exec_time_start = time.time();
3532 shutil.move(tmpfilename, filepath);
3533 try:
3534 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
3535 except AttributeError:
3536 try:
3537 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3538 except ValueError:
3539 pass;
3540 except ValueError:
3541 pass;
3542 exec_time_end = time.time();
3543 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
3544 if(os.path.exists(tmpfilename)):
3545 os.remove(tmpfilename);
3546 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
3547 if(outfile=="-"):
3548 pretmpfilename = download_from_url_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3549 tmpfilename = pretmpfilename['Filename'];
3550 downloadsize = int(os.path.getsize(tmpfilename));
3551 fulldatasize = 0;
3552 prevdownsize = 0;
3553 exec_time_start = time.time();
3554 with open(tmpfilename, 'rb') as ft:
3555 f = BytesIO();
3556 while True:
3557 databytes = ft.read(buffersize[1]);
3558 if not databytes: break;
3559 datasize = len(databytes);
3560 fulldatasize = datasize + fulldatasize;
3561 percentage = "";
3562 if(downloadsize>0):
3563 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3564 downloaddiff = fulldatasize - prevdownsize;
3565 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3566 prevdownsize = fulldatasize;
3567 f.write(databytes);
3568 f.seek(0);
3569 fdata = f.getvalue();
3570 f.close();
3571 ft.close();
3572 os.remove(tmpfilename);
3573 exec_time_end = time.time();
3574 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
3575 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
3576 return returnval;
3578 if(not havepycurl):
3579 def download_from_url_to_file_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3580 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
3581 return returnval;
3583 if(havepycurl):
3584 def download_from_url_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3585 global geturls_download_sleep, havebrotli;
3586 if(sleep<0):
3587 sleep = geturls_download_sleep;
3588 if(timeout<=0):
3589 timeout = 10;
3590 urlparts = urlparse.urlparse(httpurl);
3591 if(isinstance(httpheaders, list)):
3592 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
3593 httpheaders = fix_header_names(httpheaders);
3594 if(httpuseragent is not None):
3595 if('User-Agent' in httpheaders):
3596 httpheaders['User-Agent'] = httpuseragent;
3597 else:
3598 httpuseragent.update({'User-Agent': httpuseragent});
3599 if(httpreferer is not None):
3600 if('Referer' in httpheaders):
3601 httpheaders['Referer'] = httpreferer;
3602 else:
3603 httpuseragent.update({'Referer': httpreferer});
3604 if(urlparts.username is not None or urlparts.password is not None):
3605 if(sys.version[0]=="2"):
3606 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
3607 if(sys.version[0]>="3"):
3608 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
3609 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
3610 geturls_opener = build_opener(HTTPCookieProcessor(httpcookie));
3611 if(isinstance(httpheaders, dict)):
3612 httpheaders = make_http_headers_from_dict_to_pycurl(httpheaders);
3613 geturls_opener.addheaders = httpheaders;
3614 time.sleep(sleep);
3615 if(postdata is not None and not isinstance(postdata, dict)):
3616 postdata = urlencode(postdata);
3617 retrieved_body = BytesIO();
3618 retrieved_headers = BytesIO();
3619 try:
3620 if(httpmethod=="GET"):
3621 geturls_text = pycurl.Curl();
3622 geturls_text.setopt(geturls_text.URL, httpurl);
3623 geturls_text.setopt(geturls_text.HTTP_VERSION, geturls_text.CURL_HTTP_VERSION_2_0);
3624 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
3625 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
3626 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
3627 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
3628 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
3629 geturls_text.perform();
3630 elif(httpmethod=="POST"):
3631 geturls_text = pycurl.Curl();
3632 geturls_text.setopt(geturls_text.URL, httpurl);
3633 geturls_text.setopt(geturls_text.HTTP_VERSION, geturls_text.CURL_HTTP_VERSION_2_0);
3634 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
3635 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
3636 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
3637 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
3638 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
3639 geturls_text.setopt(geturls_text.POST, True);
3640 geturls_text.setopt(geturls_text.POSTFIELDS, postdata);
3641 geturls_text.perform();
3642 else:
3643 geturls_text = pycurl.Curl();
3644 geturls_text.setopt(geturls_text.URL, httpurl);
3645 geturls_text.setopt(geturls_text.HTTP_VERSION, geturls_text.CURL_HTTP_VERSION_2_0);
3646 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
3647 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
3648 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
3649 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
3650 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
3651 geturls_text.perform();
3652 retrieved_headers.seek(0);
3653 if(sys.version[0]=="2"):
3654 pycurlhead = retrieved_headers.read();
3655 if(sys.version[0]>="3"):
3656 pycurlhead = retrieved_headers.read().decode('UTF-8');
3657 pyhttpverinfo = re.findall(r'^HTTP/([0-9.]+) (\d+) ([A-Za-z\s]+)$', pycurlhead.splitlines()[0])[0];
3658 pycurlheadersout = make_http_headers_from_pycurl_to_dict(pycurlhead);
3659 retrieved_body.seek(0);
3660 except socket.timeout:
3661 log.info("Error With URL "+httpurl);
3662 return False;
3663 except socket.gaierror:
3664 log.info("Error With URL "+httpurl);
3665 return False;
3666 except ValueError:
3667 log.info("Error With URL "+httpurl);
3668 return False;
3669 httpcodeout = geturls_text.getinfo(geturls_text.HTTP_CODE);
3670 httpcodereason = http_status_to_reason(geturls_text.getinfo(geturls_text.HTTP_CODE));
3671 httpversionout = pyhttpverinfo[0];
3672 httpmethodout = httpmethod;
3673 httpurlout = geturls_text.getinfo(geturls_text.EFFECTIVE_URL);
3674 httpheaderout = pycurlheadersout;
3675 httpheadersentout = httpheaders;
3676 if(isinstance(httpheaderout, list)):
3677 httpheaderout = dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout)));
3678 if(sys.version[0]=="2"):
3679 try:
3680 prehttpheaderout = httpheaderout;
3681 httpheaderkeys = httpheaderout.keys();
3682 imax = len(httpheaderkeys);
3683 ic = 0;
3684 httpheaderout = {};
3685 while(ic < imax):
3686 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
3687 ic += 1;
3688 except AttributeError:
3689 pass;
3690 httpheaderout = fix_header_names(httpheaderout);
3691 if(isinstance(httpheadersentout, list)):
3692 httpheadersentout = dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout)));
3693 httpheadersentout = fix_header_names(httpheadersentout);
3694 log.info("Downloading URL "+httpurl);
3695 downloadsize = httpheaderout.get('Content-Length');
3696 if(downloadsize is not None):
3697 downloadsize = int(downloadsize);
3698 if downloadsize is None: downloadsize = 0;
3699 fulldatasize = 0;
3700 prevdownsize = 0;
3701 log.info("Downloading URL "+httpurl);
3702 with BytesIO() as strbuf:
3703 while True:
3704 databytes = retrieved_body.read(buffersize);
3705 if not databytes: break;
3706 datasize = len(databytes);
3707 fulldatasize = datasize + fulldatasize;
3708 percentage = "";
3709 if(downloadsize>0):
3710 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3711 downloaddiff = fulldatasize - prevdownsize;
3712 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3713 prevdownsize = fulldatasize;
3714 strbuf.write(databytes);
3715 strbuf.seek(0);
3716 returnval_content = strbuf.read();
3717 if(httpheaderout.get("Content-Encoding")=="gzip"):
3718 try:
3719 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
3720 except zlib.error:
3721 pass;
3722 if(httpheaderout.get("Content-Encoding")=="deflate"):
3723 try:
3724 returnval_content = zlib.decompress(returnval_content);
3725 except zlib.error:
3726 pass;
3727 if(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
3728 try:
3729 returnval_content = brotli.decompress(returnval_content);
3730 except brotli.error:
3731 pass;
3732 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason};
3733 geturls_text.close();
3734 return returnval;
3736 if(not havepycurl):
3737 def download_from_url_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3738 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
3739 return returnval;
3741 if(havepycurl):
3742 def download_from_url_file_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3743 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
3744 exec_time_start = time.time();
3745 myhash = hashlib.new("sha1");
3746 if(sys.version[0]=="2"):
3747 myhash.update(httpurl);
3748 myhash.update(str(buffersize));
3749 myhash.update(str(exec_time_start));
3750 if(sys.version[0]>="3"):
3751 myhash.update(httpurl.encode('utf-8'));
3752 myhash.update(str(buffersize).encode('utf-8'));
3753 myhash.update(str(exec_time_start).encode('utf-8'));
3754 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
3755 if(sleep<0):
3756 sleep = geturls_download_sleep;
3757 if(timeout<=0):
3758 timeout = 10;
3759 pretmpfilename = download_from_url_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
3760 if(not pretmpfilename):
3761 return False;
3762 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
3763 tmpfilename = f.name;
3764 try:
3765 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
3766 except AttributeError:
3767 try:
3768 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3769 except ValueError:
3770 pass;
3771 except ValueError:
3772 pass;
3773 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason')};
3774 f.write(pretmpfilename['Content']);
3775 f.close();
3776 exec_time_end = time.time();
3777 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
3778 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
3779 return returnval;
3781 if(not havepycurl):
3782 def download_from_url_file_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3783 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
3784 return returnval;
3786 if(havepycurl):
3787 def download_from_url_to_file_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3788 global geturls_download_sleep;
3789 if(sleep<0):
3790 sleep = geturls_download_sleep;
3791 if(timeout<=0):
3792 timeout = 10;
3793 if(not outfile=="-"):
3794 outpath = outpath.rstrip(os.path.sep);
3795 filepath = os.path.realpath(outpath+os.path.sep+outfile);
3796 if(not os.path.exists(outpath)):
3797 os.makedirs(outpath);
3798 if(os.path.exists(outpath) and os.path.isfile(outpath)):
3799 return False;
3800 if(os.path.exists(filepath) and os.path.isdir(filepath)):
3801 return False;
3802 pretmpfilename = download_from_url_file_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3803 if(not pretmpfilename):
3804 return False;
3805 tmpfilename = pretmpfilename['Filename'];
3806 downloadsize = int(os.path.getsize(tmpfilename));
3807 fulldatasize = 0;
3808 log.info("Moving file "+tmpfilename+" to "+filepath);
3809 exec_time_start = time.time();
3810 shutil.move(tmpfilename, filepath);
3811 try:
3812 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
3813 except AttributeError:
3814 try:
3815 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3816 except ValueError:
3817 pass;
3818 except ValueError:
3819 pass;
3820 exec_time_end = time.time();
3821 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
3822 if(os.path.exists(tmpfilename)):
3823 os.remove(tmpfilename);
3824 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
3825 if(outfile=="-"):
3826 pretmpfilename = download_from_url_file_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3827 tmpfilename = pretmpfilename['Filename'];
3828 downloadsize = int(os.path.getsize(tmpfilename));
3829 fulldatasize = 0;
3830 prevdownsize = 0;
3831 exec_time_start = time.time();
3832 with open(tmpfilename, 'rb') as ft:
3833 f = BytesIO();
3834 while True:
3835 databytes = ft.read(buffersize[1]);
3836 if not databytes: break;
3837 datasize = len(databytes);
3838 fulldatasize = datasize + fulldatasize;
3839 percentage = "";
3840 if(downloadsize>0):
3841 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3842 downloaddiff = fulldatasize - prevdownsize;
3843 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3844 prevdownsize = fulldatasize;
3845 f.write(databytes);
3846 f.seek(0);
3847 fdata = f.getvalue();
3848 f.close();
3849 ft.close();
3850 os.remove(tmpfilename);
3851 exec_time_end = time.time();
3852 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
3853 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
3854 return returnval;
3856 if(not havepycurl):
3857 def download_from_url_to_file_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3858 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
3859 return returnval;
3861 if(havepycurl):
3862 def download_from_url_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3863 global geturls_download_sleep, havebrotli;
3864 if(sleep<0):
3865 sleep = geturls_download_sleep;
3866 if(timeout<=0):
3867 timeout = 10;
3868 urlparts = urlparse.urlparse(httpurl);
3869 if(isinstance(httpheaders, list)):
3870 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
3871 httpheaders = fix_header_names(httpheaders);
3872 if(httpuseragent is not None):
3873 if('User-Agent' in httpheaders):
3874 httpheaders['User-Agent'] = httpuseragent;
3875 else:
3876 httpuseragent.update({'User-Agent': httpuseragent});
3877 if(httpreferer is not None):
3878 if('Referer' in httpheaders):
3879 httpheaders['Referer'] = httpreferer;
3880 else:
3881 httpuseragent.update({'Referer': httpreferer});
3882 if(urlparts.username is not None or urlparts.password is not None):
3883 if(sys.version[0]=="2"):
3884 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
3885 if(sys.version[0]>="3"):
3886 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
3887 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
3888 geturls_opener = build_opener(HTTPCookieProcessor(httpcookie));
3889 if(isinstance(httpheaders, dict)):
3890 httpheaders = make_http_headers_from_dict_to_pycurl(httpheaders);
3891 geturls_opener.addheaders = httpheaders;
3892 time.sleep(sleep);
3893 if(postdata is not None and not isinstance(postdata, dict)):
3894 postdata = urlencode(postdata);
3895 retrieved_body = BytesIO();
3896 retrieved_headers = BytesIO();
3897 try:
3898 if(httpmethod=="GET"):
3899 geturls_text = pycurl.Curl();
3900 geturls_text.setopt(geturls_text.URL, httpurl);
3901 geturls_text.setopt(geturls_text.HTTP_VERSION, geturls_text.CURL_HTTP_VERSION_3_0);
3902 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
3903 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
3904 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
3905 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
3906 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
3907 geturls_text.perform();
3908 elif(httpmethod=="POST"):
3909 geturls_text = pycurl.Curl();
3910 geturls_text.setopt(geturls_text.URL, httpurl);
3911 geturls_text.setopt(geturls_text.HTTP_VERSION, geturls_text.CURL_HTTP_VERSION_3_0);
3912 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
3913 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
3914 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
3915 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
3916 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
3917 geturls_text.setopt(geturls_text.POST, True);
3918 geturls_text.setopt(geturls_text.POSTFIELDS, postdata);
3919 geturls_text.perform();
3920 else:
3921 geturls_text = pycurl.Curl();
3922 geturls_text.setopt(geturls_text.URL, httpurl);
3923 geturls_text.setopt(geturls_text.HTTP_VERSION, geturls_text.CURL_HTTP_VERSION_3_0);
3924 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
3925 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
3926 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
3927 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
3928 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
3929 geturls_text.perform();
3930 retrieved_headers.seek(0);
3931 if(sys.version[0]=="2"):
3932 pycurlhead = retrieved_headers.read();
3933 if(sys.version[0]>="3"):
3934 pycurlhead = retrieved_headers.read().decode('UTF-8');
3935 pyhttpverinfo = re.findall(r'^HTTP/([0-9.]+) (\d+) ([A-Za-z\s]+)$', pycurlhead.splitlines()[0])[0];
3936 pycurlheadersout = make_http_headers_from_pycurl_to_dict(pycurlhead);
3937 retrieved_body.seek(0);
3938 except socket.timeout:
3939 log.info("Error With URL "+httpurl);
3940 return False;
3941 except socket.gaierror:
3942 log.info("Error With URL "+httpurl);
3943 return False;
3944 except ValueError:
3945 log.info("Error With URL "+httpurl);
3946 return False;
3947 httpcodeout = geturls_text.getinfo(geturls_text.HTTP_CODE);
3948 httpcodereason = http_status_to_reason(geturls_text.getinfo(geturls_text.HTTP_CODE));
3949 httpversionout = pyhttpverinfo[0];
3950 httpmethodout = httpmethod;
3951 httpurlout = geturls_text.getinfo(geturls_text.EFFECTIVE_URL);
3952 httpheaderout = pycurlheadersout;
3953 httpheadersentout = httpheaders;
3954 if(isinstance(httpheaderout, list)):
3955 httpheaderout = dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout)));
3956 if(sys.version[0]=="2"):
3957 try:
3958 prehttpheaderout = httpheaderout;
3959 httpheaderkeys = httpheaderout.keys();
3960 imax = len(httpheaderkeys);
3961 ic = 0;
3962 httpheaderout = {};
3963 while(ic < imax):
3964 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
3965 ic += 1;
3966 except AttributeError:
3967 pass;
3968 httpheaderout = fix_header_names(httpheaderout);
3969 if(isinstance(httpheadersentout, list)):
3970 httpheadersentout = dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout)));
3971 httpheadersentout = fix_header_names(httpheadersentout);
3972 log.info("Downloading URL "+httpurl);
3973 downloadsize = httpheaderout.get('Content-Length');
3974 if(downloadsize is not None):
3975 downloadsize = int(downloadsize);
3976 if downloadsize is None: downloadsize = 0;
3977 fulldatasize = 0;
3978 prevdownsize = 0;
3979 log.info("Downloading URL "+httpurl);
3980 with BytesIO() as strbuf:
3981 while True:
3982 databytes = retrieved_body.read(buffersize);
3983 if not databytes: break;
3984 datasize = len(databytes);
3985 fulldatasize = datasize + fulldatasize;
3986 percentage = "";
3987 if(downloadsize>0):
3988 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3989 downloaddiff = fulldatasize - prevdownsize;
3990 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3991 prevdownsize = fulldatasize;
3992 strbuf.write(databytes);
3993 strbuf.seek(0);
3994 returnval_content = strbuf.read();
3995 if(httpheaderout.get("Content-Encoding")=="gzip"):
3996 try:
3997 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
3998 except zlib.error:
3999 pass;
4000 if(httpheaderout.get("Content-Encoding")=="deflate"):
4001 try:
4002 returnval_content = zlib.decompress(returnval_content);
4003 except zlib.error:
4004 pass;
4005 if(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
4006 try:
4007 returnval_content = brotli.decompress(returnval_content);
4008 except brotli.error:
4009 pass;
4010 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason};
4011 geturls_text.close();
4012 return returnval;
4014 if(not havepycurl):
4015 def download_from_url_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4016 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
4017 return returnval;
4019 if(havepycurl):
4020 def download_from_url_file_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4021 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
4022 exec_time_start = time.time();
4023 myhash = hashlib.new("sha1");
4024 if(sys.version[0]=="2"):
4025 myhash.update(httpurl);
4026 myhash.update(str(buffersize));
4027 myhash.update(str(exec_time_start));
4028 if(sys.version[0]>="3"):
4029 myhash.update(httpurl.encode('utf-8'));
4030 myhash.update(str(buffersize).encode('utf-8'));
4031 myhash.update(str(exec_time_start).encode('utf-8'));
4032 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
4033 if(sleep<0):
4034 sleep = geturls_download_sleep;
4035 if(timeout<=0):
4036 timeout = 10;
4037 pretmpfilename = download_from_url_with_pycurl3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
4038 if(not pretmpfilename):
4039 return False;
4040 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
4041 tmpfilename = f.name;
4042 try:
4043 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
4044 except AttributeError:
4045 try:
4046 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4047 except ValueError:
4048 pass;
4049 except ValueError:
4050 pass;
4051 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason')};
4052 f.write(pretmpfilename['Content']);
4053 f.close();
4054 exec_time_end = time.time();
4055 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
4056 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
4057 return returnval;
4059 if(not havepycurl):
4060 def download_from_url_file_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4061 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
4062 return returnval;
4064 if(havepycurl):
4065 def download_from_url_to_file_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4066 global geturls_download_sleep;
4067 if(sleep<0):
4068 sleep = geturls_download_sleep;
4069 if(timeout<=0):
4070 timeout = 10;
4071 if(not outfile=="-"):
4072 outpath = outpath.rstrip(os.path.sep);
4073 filepath = os.path.realpath(outpath+os.path.sep+outfile);
4074 if(not os.path.exists(outpath)):
4075 os.makedirs(outpath);
4076 if(os.path.exists(outpath) and os.path.isfile(outpath)):
4077 return False;
4078 if(os.path.exists(filepath) and os.path.isdir(filepath)):
4079 return False;
4080 pretmpfilename = download_from_url_file_with_pycurl3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4081 if(not pretmpfilename):
4082 return False;
4083 tmpfilename = pretmpfilename['Filename'];
4084 downloadsize = int(os.path.getsize(tmpfilename));
4085 fulldatasize = 0;
4086 log.info("Moving file "+tmpfilename+" to "+filepath);
4087 exec_time_start = time.time();
4088 shutil.move(tmpfilename, filepath);
4089 try:
4090 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
4091 except AttributeError:
4092 try:
4093 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4094 except ValueError:
4095 pass;
4096 except ValueError:
4097 pass;
4098 exec_time_end = time.time();
4099 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
4100 if(os.path.exists(tmpfilename)):
4101 os.remove(tmpfilename);
4102 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
4103 if(outfile=="-"):
4104 pretmpfilename = download_from_url_file_with_pycurl3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4105 tmpfilename = pretmpfilename['Filename'];
4106 downloadsize = int(os.path.getsize(tmpfilename));
4107 fulldatasize = 0;
4108 prevdownsize = 0;
4109 exec_time_start = time.time();
4110 with open(tmpfilename, 'rb') as ft:
4111 f = BytesIO();
4112 while True:
4113 databytes = ft.read(buffersize[1]);
4114 if not databytes: break;
4115 datasize = len(databytes);
4116 fulldatasize = datasize + fulldatasize;
4117 percentage = "";
4118 if(downloadsize>0):
4119 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4120 downloaddiff = fulldatasize - prevdownsize;
4121 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4122 prevdownsize = fulldatasize;
4123 f.write(databytes);
4124 f.seek(0);
4125 fdata = f.getvalue();
4126 f.close();
4127 ft.close();
4128 os.remove(tmpfilename);
4129 exec_time_end = time.time();
4130 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
4131 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': httpmethod, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
4132 return returnval;
4134 if(not havepycurl):
4135 def download_from_url_to_file_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4136 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
4137 return returnval;
4139 def download_file_from_ftp_file(url):
4140 urlparts = urlparse.urlparse(url);
4141 file_name = os.path.basename(urlparts.path);
4142 file_dir = os.path.dirname(urlparts.path);
4143 if(urlparts.username is not None):
4144 ftp_username = urlparts.username;
4145 else:
4146 ftp_username = "anonymous";
4147 if(urlparts.password is not None):
4148 ftp_password = urlparts.password;
4149 elif(urlparts.password is None and urlparts.username=="anonymous"):
4150 ftp_password = "anonymous";
4151 else:
4152 ftp_password = "";
4153 if(urlparts.scheme=="ftp"):
4154 ftp = FTP();
4155 elif(urlparts.scheme=="ftps"):
4156 ftp = FTP_TLS();
4157 else:
4158 return False;
4159 if(urlparts.scheme=="http" or urlparts.scheme=="https"):
4160 return False;
4161 ftp_port = urlparts.port;
4162 if(urlparts.port is None):
4163 ftp_port = 21;
4164 try:
4165 ftp.connect(urlparts.hostname, ftp_port);
4166 except socket.gaierror:
4167 log.info("Error With URL "+httpurl);
4168 return False;
4169 except socket.timeout:
4170 log.info("Error With URL "+httpurl);
4171 return False;
4172 ftp.login(urlparts.username, urlparts.password);
4173 if(urlparts.scheme=="ftps"):
4174 ftp.prot_p();
4175 ftpfile = BytesIO();
4176 ftp.retrbinary("RETR "+urlparts.path, ftpfile.write);
4177 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
4178 ftp.close();
4179 ftpfile.seek(0, 0);
4180 return ftpfile;
4182 def download_file_from_ftp_string(url):
4183 ftpfile = download_file_from_ftp_file(url);
4184 return ftpfile.read();
4186 def download_from_url_with_ftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4187 global geturls_download_sleep, havebrotli;
4188 if(sleep<0):
4189 sleep = geturls_download_sleep;
4190 if(timeout<=0):
4191 timeout = 10;
4192 urlparts = urlparse.urlparse(httpurl);
4193 if(isinstance(httpheaders, list)):
4194 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
4195 httpheaders = fix_header_names(httpheaders);
4196 if(httpuseragent is not None):
4197 if('User-Agent' in httpheaders):
4198 httpheaders['User-Agent'] = httpuseragent;
4199 else:
4200 httpuseragent.update({'User-Agent': httpuseragent});
4201 if(httpreferer is not None):
4202 if('Referer' in httpheaders):
4203 httpheaders['Referer'] = httpreferer;
4204 else:
4205 httpuseragent.update({'Referer': httpreferer});
4206 if(isinstance(httpheaders, dict)):
4207 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
4208 time.sleep(sleep);
4209 geturls_text = download_file_from_ftp_file(httpurl);
4210 if(not geturls_text):
4211 return False;
4212 downloadsize = None;
4213 if(downloadsize is not None):
4214 downloadsize = int(downloadsize);
4215 if downloadsize is None: downloadsize = 0;
4216 fulldatasize = 0;
4217 prevdownsize = 0;
4218 log.info("Downloading URL "+httpurl);
4219 with BytesIO() as strbuf:
4220 while True:
4221 databytes = geturls_text.read(buffersize);
4222 if not databytes: break;
4223 datasize = len(databytes);
4224 fulldatasize = datasize + fulldatasize;
4225 percentage = "";
4226 if(downloadsize>0):
4227 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4228 downloaddiff = fulldatasize - prevdownsize;
4229 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4230 prevdownsize = fulldatasize;
4231 strbuf.write(databytes);
4232 strbuf.seek(0);
4233 returnval_content = strbuf.read();
4234 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl, 'Code': None};
4235 geturls_text.close();
4236 return returnval;
4238 def download_from_url_file_with_ftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4239 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
4240 exec_time_start = time.time();
4241 myhash = hashlib.new("sha1");
4242 if(sys.version[0]=="2"):
4243 myhash.update(httpurl);
4244 myhash.update(str(buffersize));
4245 myhash.update(str(exec_time_start));
4246 if(sys.version[0]>="3"):
4247 myhash.update(httpurl.encode('utf-8'));
4248 myhash.update(str(buffersize).encode('utf-8'));
4249 myhash.update(str(exec_time_start).encode('utf-8'));
4250 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
4251 if(sleep<0):
4252 sleep = geturls_download_sleep;
4253 if(timeout<=0):
4254 timeout = 10;
4255 pretmpfilename = download_from_url_with_ftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
4256 if(not pretmpfilename):
4257 return False;
4258 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
4259 tmpfilename = f.name;
4260 try:
4261 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
4262 except AttributeError:
4263 try:
4264 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4265 except ValueError:
4266 pass;
4267 except ValueError:
4268 pass;
4269 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason')};
4270 f.write(pretmpfilename['Content']);
4271 f.close();
4272 exec_time_end = time.time();
4273 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
4274 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
4275 return returnval;
4277 def download_from_url_to_file_with_ftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4278 global geturls_download_sleep;
4279 if(sleep<0):
4280 sleep = geturls_download_sleep;
4281 if(timeout<=0):
4282 timeout = 10;
4283 if(not outfile=="-"):
4284 outpath = outpath.rstrip(os.path.sep);
4285 filepath = os.path.realpath(outpath+os.path.sep+outfile);
4286 if(not os.path.exists(outpath)):
4287 os.makedirs(outpath);
4288 if(os.path.exists(outpath) and os.path.isfile(outpath)):
4289 return False;
4290 if(os.path.exists(filepath) and os.path.isdir(filepath)):
4291 return False;
4292 pretmpfilename = download_from_url_file_with_ftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4293 if(not pretmpfilename):
4294 return False;
4295 tmpfilename = pretmpfilename['Filename'];
4296 downloadsize = int(os.path.getsize(tmpfilename));
4297 fulldatasize = 0;
4298 log.info("Moving file "+tmpfilename+" to "+filepath);
4299 exec_time_start = time.time();
4300 shutil.move(tmpfilename, filepath);
4301 exec_time_end = time.time();
4302 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
4303 if(os.path.exists(tmpfilename)):
4304 os.remove(tmpfilename);
4305 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': None, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
4306 if(outfile=="-"):
4307 pretmpfilename = download_from_url_file_with_ftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4308 tmpfilename = pretmpfilename['Filename'];
4309 downloadsize = int(os.path.getsize(tmpfilename));
4310 fulldatasize = 0;
4311 prevdownsize = 0;
4312 exec_time_start = time.time();
4313 with open(tmpfilename, 'rb') as ft:
4314 f = BytesIO();
4315 while True:
4316 databytes = ft.read(buffersize[1]);
4317 if not databytes: break;
4318 datasize = len(databytes);
4319 fulldatasize = datasize + fulldatasize;
4320 percentage = "";
4321 if(downloadsize>0):
4322 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4323 downloaddiff = fulldatasize - prevdownsize;
4324 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4325 prevdownsize = fulldatasize;
4326 f.write(databytes);
4327 f.seek(0);
4328 fdata = f.getvalue();
4329 f.close();
4330 ft.close();
4331 os.remove(tmpfilename);
4332 exec_time_end = time.time();
4333 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
4334 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': None, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
4335 return returnval;
4337 def upload_file_to_ftp_file(ftpfile, url):
4338 urlparts = urlparse.urlparse(url);
4339 file_name = os.path.basename(urlparts.path);
4340 file_dir = os.path.dirname(urlparts.path);
4341 if(urlparts.username is not None):
4342 ftp_username = urlparts.username;
4343 else:
4344 ftp_username = "anonymous";
4345 if(urlparts.password is not None):
4346 ftp_password = urlparts.password;
4347 elif(urlparts.password is None and urlparts.username=="anonymous"):
4348 ftp_password = "anonymous";
4349 else:
4350 ftp_password = "";
4351 if(urlparts.scheme=="ftp"):
4352 ftp = FTP();
4353 elif(urlparts.scheme=="ftps"):
4354 ftp = FTP_TLS();
4355 else:
4356 return False;
4357 if(urlparts.scheme=="http" or urlparts.scheme=="https"):
4358 return False;
4359 ftp_port = urlparts.port;
4360 if(urlparts.port is None):
4361 ftp_port = 21;
4362 try:
4363 ftp.connect(urlparts.hostname, ftp_port);
4364 except socket.gaierror:
4365 log.info("Error With URL "+httpurl);
4366 return False;
4367 except socket.timeout:
4368 log.info("Error With URL "+httpurl);
4369 return False;
4370 ftp.login(urlparts.username, urlparts.password);
4371 if(urlparts.scheme=="ftps"):
4372 ftp.prot_p();
4373 ftp.storbinary("STOR "+urlparts.path, ftpfile);
4374 ftp.close();
4375 ftpfile.seek(0, 0);
4376 return ftpfile;
4378 def upload_file_to_ftp_string(ftpstring, url):
4379 ftpfileo = BytesIO(ftpstring);
4380 ftpfile = upload_file_to_ftp_file(ftpfileo, url);
4381 ftpfileo.close();
4382 return ftpfile;
4384 if(haveparamiko):
4385 def download_file_from_sftp_file(url):
4386 urlparts = urlparse.urlparse(url);
4387 file_name = os.path.basename(urlparts.path);
4388 file_dir = os.path.dirname(urlparts.path);
4389 if(urlparts.scheme=="http" or urlparts.scheme=="https"):
4390 return False;
4391 sftp_port = urlparts.port;
4392 if(urlparts.port is None):
4393 sftp_port = 22;
4394 else:
4395 sftp_port = urlparts.port;
4396 if(urlparts.username is not None):
4397 sftp_username = urlparts.username;
4398 else:
4399 sftp_username = "anonymous";
4400 if(urlparts.password is not None):
4401 sftp_password = urlparts.password;
4402 elif(urlparts.password is None and urlparts.username=="anonymous"):
4403 sftp_password = "anonymous";
4404 else:
4405 sftp_password = "";
4406 if(urlparts.scheme!="sftp"):
4407 return False;
4408 ssh = paramiko.SSHClient();
4409 ssh.load_system_host_keys();
4410 ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy());
4411 try:
4412 ssh.connect(urlparts.hostname, port=sftp_port, username=urlparts.username, password=urlparts.password);
4413 except paramiko.ssh_exception.SSHException:
4414 return False;
4415 except socket.gaierror:
4416 log.info("Error With URL "+httpurl);
4417 return False;
4418 except socket.timeout:
4419 log.info("Error With URL "+httpurl);
4420 return False;
4421 sftp = ssh.open_sftp();
4422 sftpfile = BytesIO();
4423 sftp.getfo(urlparts.path, sftpfile);
4424 sftp.close();
4425 ssh.close();
4426 sftpfile.seek(0, 0);
4427 return sftpfile;
4428 else:
4429 def download_file_from_sftp_file(url):
4430 return False;
4432 if(haveparamiko):
4433 def download_file_from_sftp_string(url):
4434 sftpfile = download_file_from_sftp_file(url);
4435 return sftpfile.read();
4436 else:
4437 def download_file_from_ftp_string(url):
4438 return False;
4440 if(haveparamiko):
4441 def download_from_url_with_sftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4442 global geturls_download_sleep, havebrotli;
4443 if(sleep<0):
4444 sleep = geturls_download_sleep;
4445 if(timeout<=0):
4446 timeout = 10;
4447 urlparts = urlparse.urlparse(httpurl);
4448 if(isinstance(httpheaders, list)):
4449 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
4450 httpheaders = fix_header_names(httpheaders);
4451 if(httpuseragent is not None):
4452 if('User-Agent' in httpheaders):
4453 httpheaders['User-Agent'] = httpuseragent;
4454 else:
4455 httpuseragent.update({'User-Agent': httpuseragent});
4456 if(httpreferer is not None):
4457 if('Referer' in httpheaders):
4458 httpheaders['Referer'] = httpreferer;
4459 else:
4460 httpuseragent.update({'Referer': httpreferer});
4461 if(isinstance(httpheaders, dict)):
4462 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
4463 time.sleep(sleep);
4464 geturls_text = download_file_from_sftp_file(httpurl);
4465 if(not geturls_text):
4466 return False;
4467 downloadsize = None;
4468 if(downloadsize is not None):
4469 downloadsize = int(downloadsize);
4470 if downloadsize is None: downloadsize = 0;
4471 fulldatasize = 0;
4472 prevdownsize = 0;
4473 log.info("Downloading URL "+httpurl);
4474 with BytesIO() as strbuf:
4475 while True:
4476 databytes = geturls_text.read(buffersize);
4477 if not databytes: break;
4478 datasize = len(databytes);
4479 fulldatasize = datasize + fulldatasize;
4480 percentage = "";
4481 if(downloadsize>0):
4482 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4483 downloaddiff = fulldatasize - prevdownsize;
4484 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4485 prevdownsize = fulldatasize;
4486 strbuf.write(databytes);
4487 strbuf.seek(0);
4488 returnval_content = strbuf.read();
4489 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl, 'Code': None};
4490 geturls_text.close();
4491 return returnval;
4493 if(not haveparamiko):
4494 def download_from_url_with_sftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4495 return False;
4497 if(haveparamiko):
4498 def download_from_url_file_with_sftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4499 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
4500 exec_time_start = time.time();
4501 myhash = hashlib.new("sha1");
4502 if(sys.version[0]=="2"):
4503 myhash.update(httpurl);
4504 myhash.update(str(buffersize));
4505 myhash.update(str(exec_time_start));
4506 if(sys.version[0]>="3"):
4507 myhash.update(httpurl.encode('utf-8'));
4508 myhash.update(str(buffersize).encode('utf-8'));
4509 myhash.update(str(exec_time_start).encode('utf-8'));
4510 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
4511 if(sleep<0):
4512 sleep = geturls_download_sleep;
4513 if(timeout<=0):
4514 timeout = 10;
4515 pretmpfilename = download_from_url_with_sftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
4516 if(not pretmpfilename):
4517 return False;
4518 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
4519 tmpfilename = f.name;
4520 try:
4521 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
4522 except AttributeError:
4523 try:
4524 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4525 except ValueError:
4526 pass;
4527 except ValueError:
4528 pass;
4529 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason')};
4530 f.write(pretmpfilename['Content']);
4531 f.close();
4532 exec_time_end = time.time();
4533 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
4534 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
4535 return returnval;
4537 if(not haveparamiko):
4538 def download_from_url_file_with_sftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4539 return False;
4541 if(haveparamiko):
4542 def download_from_url_to_file_with_sftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4543 global geturls_download_sleep;
4544 if(sleep<0):
4545 sleep = geturls_download_sleep;
4546 if(timeout<=0):
4547 timeout = 10;
4548 if(not outfile=="-"):
4549 outpath = outpath.rstrip(os.path.sep);
4550 filepath = os.path.realpath(outpath+os.path.sep+outfile);
4551 if(not os.path.exists(outpath)):
4552 os.makedirs(outpath);
4553 if(os.path.exists(outpath) and os.path.isfile(outpath)):
4554 return False;
4555 if(os.path.exists(filepath) and os.path.isdir(filepath)):
4556 return False;
4557 pretmpfilename = download_from_url_file_with_sftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4558 if(not pretmpfilename):
4559 return False;
4560 tmpfilename = pretmpfilename['Filename'];
4561 downloadsize = int(os.path.getsize(tmpfilename));
4562 fulldatasize = 0;
4563 log.info("Moving file "+tmpfilename+" to "+filepath);
4564 exec_time_start = time.time();
4565 shutil.move(tmpfilename, filepath);
4566 exec_time_end = time.time();
4567 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
4568 if(os.path.exists(tmpfilename)):
4569 os.remove(tmpfilename);
4570 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': None, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
4571 if(outfile=="-"):
4572 pretmpfilename = download_from_url_file_with_sftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4573 tmpfilename = pretmpfilename['Filename'];
4574 downloadsize = int(os.path.getsize(tmpfilename));
4575 fulldatasize = 0;
4576 prevdownsize = 0;
4577 exec_time_start = time.time();
4578 with open(tmpfilename, 'rb') as ft:
4579 f = BytesIO();
4580 while True:
4581 databytes = ft.read(buffersize[1]);
4582 if not databytes: break;
4583 datasize = len(databytes);
4584 fulldatasize = datasize + fulldatasize;
4585 percentage = "";
4586 if(downloadsize>0):
4587 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4588 downloaddiff = fulldatasize - prevdownsize;
4589 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4590 prevdownsize = fulldatasize;
4591 f.write(databytes);
4592 f.seek(0);
4593 fdata = f.getvalue();
4594 f.close();
4595 ft.close();
4596 os.remove(tmpfilename);
4597 exec_time_end = time.time();
4598 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
4599 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': None, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
4600 return returnval;
4602 if(not haveparamiko):
4603 def download_from_url_to_file_with_sftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4604 return False;
4606 if(haveparamiko):
4607 def upload_file_to_sftp_file(sftpfile, url):
4608 urlparts = urlparse.urlparse(url);
4609 file_name = os.path.basename(urlparts.path);
4610 file_dir = os.path.dirname(urlparts.path);
4611 sftp_port = urlparts.port;
4612 if(urlparts.scheme=="http" or urlparts.scheme=="https"):
4613 return False;
4614 if(urlparts.port is None):
4615 sftp_port = 22;
4616 else:
4617 sftp_port = urlparts.port;
4618 if(urlparts.username is not None):
4619 sftp_username = urlparts.username;
4620 else:
4621 sftp_username = "anonymous";
4622 if(urlparts.password is not None):
4623 sftp_password = urlparts.password;
4624 elif(urlparts.password is None and urlparts.username=="anonymous"):
4625 sftp_password = "anonymous";
4626 else:
4627 sftp_password = "";
4628 if(urlparts.scheme!="sftp"):
4629 return False;
4630 ssh = paramiko.SSHClient();
4631 ssh.load_system_host_keys();
4632 ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy());
4633 try:
4634 ssh.connect(urlparts.hostname, port=sftp_port, username=urlparts.username, password=urlparts.password);
4635 except paramiko.ssh_exception.SSHException:
4636 return False;
4637 except socket.gaierror:
4638 log.info("Error With URL "+httpurl);
4639 return False;
4640 except socket.timeout:
4641 log.info("Error With URL "+httpurl);
4642 return False;
4643 sftp = ssh.open_sftp();
4644 sftp.putfo(sftpfile, urlparts.path);
4645 sftp.close();
4646 ssh.close();
4647 sftpfile.seek(0, 0);
4648 return sftpfile;
4649 else:
4650 def upload_file_to_sftp_file(sftpfile, url):
4651 return False;
4653 if(haveparamiko):
4654 def upload_file_to_sftp_string(sftpstring, url):
4655 sftpfileo = BytesIO(sftpstring);
4656 sftpfile = upload_file_to_sftp_files(ftpfileo, url);
4657 sftpfileo.close();
4658 return sftpfile;
4659 else:
4660 def upload_file_to_sftp_string(url):
4661 return False;
4664 if(havepysftp):
4665 def download_file_from_pysftp_file(url):
4666 urlparts = urlparse.urlparse(url);
4667 file_name = os.path.basename(urlparts.path);
4668 file_dir = os.path.dirname(urlparts.path);
4669 if(urlparts.scheme=="http" or urlparts.scheme=="https"):
4670 return False;
4671 sftp_port = urlparts.port;
4672 if(urlparts.port is None):
4673 sftp_port = 22;
4674 else:
4675 sftp_port = urlparts.port;
4676 if(urlparts.username is not None):
4677 sftp_username = urlparts.username;
4678 else:
4679 sftp_username = "anonymous";
4680 if(urlparts.password is not None):
4681 sftp_password = urlparts.password;
4682 elif(urlparts.password is None and urlparts.username=="anonymous"):
4683 sftp_password = "anonymous";
4684 else:
4685 sftp_password = "";
4686 if(urlparts.scheme!="sftp"):
4687 return False;
4688 try:
4689 pysftp.Connection(urlparts.hostname, port=sftp_port, username=urlparts.username, password=urlparts.password);
4690 except paramiko.ssh_exception.SSHException:
4691 return False;
4692 except socket.gaierror:
4693 log.info("Error With URL "+httpurl);
4694 return False;
4695 except socket.timeout:
4696 log.info("Error With URL "+httpurl);
4697 return False;
4698 sftp = ssh.open_sftp();
4699 sftpfile = BytesIO();
4700 sftp.getfo(urlparts.path, sftpfile);
4701 sftp.close();
4702 ssh.close();
4703 sftpfile.seek(0, 0);
4704 return sftpfile;
4705 else:
4706 def download_file_from_pysftp_file(url):
4707 return False;
4709 if(havepysftp):
4710 def download_file_from_pysftp_string(url):
4711 sftpfile = download_file_from_pysftp_file(url);
4712 return sftpfile.read();
4713 else:
4714 def download_file_from_ftp_string(url):
4715 return False;
4717 if(havepysftp):
4718 def download_from_url_with_pysftp(httpurl, httpheaders=geturls_headers, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4719 global geturls_download_sleep, havebrotli;
4720 if(sleep<0):
4721 sleep = geturls_download_sleep;
4722 if(timeout<=0):
4723 timeout = 10;
4724 urlparts = urlparse.urlparse(httpurl);
4725 if(isinstance(httpheaders, list)):
4726 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
4727 httpheaders = fix_header_names(httpheaders);
4728 if(isinstance(httpheaders, dict)):
4729 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
4730 time.sleep(sleep);
4731 geturls_text = download_file_from_pysftp_file(httpurl);
4732 if(not geturls_text):
4733 return False;
4734 downloadsize = None;
4735 if(downloadsize is not None):
4736 downloadsize = int(downloadsize);
4737 if downloadsize is None: downloadsize = 0;
4738 fulldatasize = 0;
4739 prevdownsize = 0;
4740 log.info("Downloading URL "+httpurl);
4741 with BytesIO() as strbuf:
4742 while True:
4743 databytes = geturls_text.read(buffersize);
4744 if not databytes: break;
4745 datasize = len(databytes);
4746 fulldatasize = datasize + fulldatasize;
4747 percentage = "";
4748 if(downloadsize>0):
4749 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4750 downloaddiff = fulldatasize - prevdownsize;
4751 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4752 prevdownsize = fulldatasize;
4753 strbuf.write(databytes);
4754 strbuf.seek(0);
4755 returnval_content = strbuf.read();
4756 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl, 'Code': None};
4757 geturls_text.close();
4758 return returnval;
4760 if(not havepysftp):
4761 def download_from_url_with_pysftp(httpurl, httpheaders=geturls_headers, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4762 return False;
4764 if(havepysftp):
4765 def download_from_url_file_with_pysftp(httpurl, httpheaders=geturls_headers, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4766 global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
4767 exec_time_start = time.time();
4768 myhash = hashlib.new("sha1");
4769 if(sys.version[0]=="2"):
4770 myhash.update(httpurl);
4771 myhash.update(str(buffersize));
4772 myhash.update(str(exec_time_start));
4773 if(sys.version[0]>="3"):
4774 myhash.update(httpurl.encode('utf-8'));
4775 myhash.update(str(buffersize).encode('utf-8'));
4776 myhash.update(str(exec_time_start).encode('utf-8'));
4777 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
4778 if(sleep<0):
4779 sleep = geturls_download_sleep;
4780 if(timeout<=0):
4781 timeout = 10;
4782 pretmpfilename = download_from_url_with_pysftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
4783 if(not pretmpfilename):
4784 return False;
4785 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
4786 tmpfilename = f.name;
4787 try:
4788 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
4789 except AttributeError:
4790 try:
4791 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4792 except ValueError:
4793 pass;
4794 except ValueError:
4795 pass;
4796 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason')};
4797 f.write(pretmpfilename['Content']);
4798 f.close();
4799 exec_time_end = time.time();
4800 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
4801 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
4802 return returnval;
4804 if(not havepysftp):
4805 def download_from_url_file_with_pysftp(httpurl, httpheaders=geturls_headers, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4806 return False;
4808 if(havepysftp):
4809 def download_from_url_to_file_with_pysftp(httpurl, httpheaders=geturls_headers, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4810 global geturls_download_sleep;
4811 if(sleep<0):
4812 sleep = geturls_download_sleep;
4813 if(timeout<=0):
4814 timeout = 10;
4815 if(not outfile=="-"):
4816 outpath = outpath.rstrip(os.path.sep);
4817 filepath = os.path.realpath(outpath+os.path.sep+outfile);
4818 if(not os.path.exists(outpath)):
4819 os.makedirs(outpath);
4820 if(os.path.exists(outpath) and os.path.isfile(outpath)):
4821 return False;
4822 if(os.path.exists(filepath) and os.path.isdir(filepath)):
4823 return False;
4824 pretmpfilename = download_from_url_file_with_pysftp(httpurl, httpheaders, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4825 if(not pretmpfilename):
4826 return False;
4827 tmpfilename = pretmpfilename['Filename'];
4828 downloadsize = int(os.path.getsize(tmpfilename));
4829 fulldatasize = 0;
4830 log.info("Moving file "+tmpfilename+" to "+filepath);
4831 exec_time_start = time.time();
4832 shutil.move(tmpfilename, filepath);
4833 exec_time_end = time.time();
4834 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
4835 if(os.path.exists(tmpfilename)):
4836 os.remove(tmpfilename);
4837 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': None, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
4838 if(outfile=="-"):
4839 pretmpfilename = download_from_url_file_with_pysftp(httpurl, httpheaders, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4840 tmpfilename = pretmpfilename['Filename'];
4841 downloadsize = int(os.path.getsize(tmpfilename));
4842 fulldatasize = 0;
4843 prevdownsize = 0;
4844 exec_time_start = time.time();
4845 with open(tmpfilename, 'rb') as ft:
4846 f = BytesIO();
4847 while True:
4848 databytes = ft.read(buffersize[1]);
4849 if not databytes: break;
4850 datasize = len(databytes);
4851 fulldatasize = datasize + fulldatasize;
4852 percentage = "";
4853 if(downloadsize>0):
4854 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4855 downloaddiff = fulldatasize - prevdownsize;
4856 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4857 prevdownsize = fulldatasize;
4858 f.write(databytes);
4859 f.seek(0);
4860 fdata = f.getvalue();
4861 f.close();
4862 ft.close();
4863 os.remove(tmpfilename);
4864 exec_time_end = time.time();
4865 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
4866 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'Version': pretmpfilename['Version'], 'Method': pretmpfilename['Method'], 'Method': None, 'HeadersSent': pretmpfilename['HeadersSent'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code'], 'Reason': pretmpfilename['Reason']};
4867 return returnval;
4869 if(not havepysftp):
4870 def download_from_url_to_file_with_pysftp(httpurl, httpheaders=geturls_headers, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4871 return False;
4873 if(havepysftp):
4874 def upload_file_to_pysftp_file(sftpfile, url):
4875 urlparts = urlparse.urlparse(url);
4876 file_name = os.path.basename(urlparts.path);
4877 file_dir = os.path.dirname(urlparts.path);
4878 sftp_port = urlparts.port;
4879 if(urlparts.scheme=="http" or urlparts.scheme=="https"):
4880 return False;
4881 if(urlparts.port is None):
4882 sftp_port = 22;
4883 else:
4884 sftp_port = urlparts.port;
4885 if(urlparts.username is not None):
4886 sftp_username = urlparts.username;
4887 else:
4888 sftp_username = "anonymous";
4889 if(urlparts.password is not None):
4890 sftp_password = urlparts.password;
4891 elif(urlparts.password is None and urlparts.username=="anonymous"):
4892 sftp_password = "anonymous";
4893 else:
4894 sftp_password = "";
4895 if(urlparts.scheme!="sftp"):
4896 return False;
4897 try:
4898 pysftp.Connection(urlparts.hostname, port=sftp_port, username=urlparts.username, password=urlparts.password);
4899 except paramiko.ssh_exception.SSHException:
4900 return False;
4901 except socket.gaierror:
4902 log.info("Error With URL "+httpurl);
4903 return False;
4904 except socket.timeout:
4905 log.info("Error With URL "+httpurl);
4906 return False;
4907 sftp = ssh.open_sftp();
4908 sftp.putfo(sftpfile, urlparts.path);
4909 sftp.close();
4910 ssh.close();
4911 sftpfile.seek(0, 0);
4912 return sftpfile;
4913 else:
4914 def upload_file_to_pysftp_file(sftpfile, url):
4915 return False;
4917 if(havepysftp):
4918 def upload_file_to_pysftp_string(sftpstring, url):
4919 sftpfileo = BytesIO(sftpstring);
4920 sftpfile = upload_file_to_pysftp_files(ftpfileo, url);
4921 sftpfileo.close();
4922 return sftpfile;
4923 else:
4924 def upload_file_to_pysftp_string(url):
4925 return False;