Update pywwwget.py
[PyWWW-Get.git] / pywwwgetold.py
blobfecccb0c35bb3c96c43295984d245acf0e07d534
1 #!/usr/bin/env python
3 '''
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwgetold.py - Last Update: 10/5/2023 Ver. 2.0.2 RC 1 - Author: cooldude2k $
16 '''
18 from __future__ import division, absolute_import, print_function;
19 import re, os, sys, hashlib, shutil, platform, tempfile, urllib, zlib, time, argparse, subprocess, socket, email.utils, datetime, time;
20 import logging as log;
21 from ftplib import FTP, FTP_TLS;
22 from base64 import b64encode;
23 try:
24 from cgi import parse_qsl;
25 except ImportError:
26 from urlparse import parse_qsl;
27 haverequests = False;
28 try:
29 import requests;
30 haverequests = True;
31 except ImportError:
32 haverequests = False;
33 havemechanize = False;
34 try:
35 import mechanize;
36 havemechanize = True;
37 except ImportError:
38 havemechanize = False;
39 havepycurl = False;
40 try:
41 import pycurl;
42 havepycurl = True;
43 except ImportError:
44 havepycurl = False;
45 haveparamiko = False;
46 try:
47 import paramiko;
48 haveparamiko = True;
49 except ImportError:
50 haveparamiko = False;
51 havepysftp = False;
52 try:
53 import pysftp;
54 havepysftp = True;
55 except ImportError:
56 havepysftp = False;
57 haveurllib3 = False;
58 try:
59 import urllib3;
60 haveurllib3 = True;
61 except ImportError:
62 haveurllib3 = False;
63 havehttplib2 = False;
64 try:
65 import httplib2;
66 from httplib2 import HTTPConnectionWithTimeout, HTTPSConnectionWithTimeout;
67 havehttplib2 = True;
68 except ImportError:
69 havehttplib2 = False;
70 havehttpx = False;
71 try:
72 import httpx;
73 havehttpx = True;
74 except ImportError:
75 havehttpx = False;
76 havehttpcore = False;
77 try:
78 import httpcore;
79 havehttpcore = True;
80 except ImportError:
81 havehttpcore = False;
82 haveaiohttp = False;
83 try:
84 import aiohttp;
85 haveaiohttp = True;
86 except ImportError:
87 haveaiohttp = False;
88 havebrotli = False;
89 try:
90 import brotli;
91 havebrotli = True;
92 except ImportError:
93 havebrotli = False;
94 havezstd = False;
95 try:
96 import zstandard;
97 havezstd = True;
98 except ImportError:
99 havezstd = False;
100 havelzma = False;
101 try:
102 import lzma;
103 havelzma = True;
104 except ImportError:
105 havelzma = False;
106 if(sys.version[0]=="2"):
107 try:
108 from io import StringIO, BytesIO;
109 except ImportError:
110 try:
111 from cStringIO import StringIO;
112 from cStringIO import StringIO as BytesIO;
113 except ImportError:
114 from StringIO import StringIO;
115 from StringIO import StringIO as BytesIO;
116 # From http://python-future.org/compatible_idioms.html
117 from urlparse import urlparse, urlunparse, urlsplit, urlunsplit, urljoin;
118 from urllib import urlencode;
119 from urllib import urlopen as urlopenalt;
120 from urllib2 import urlopen, Request, install_opener, HTTPError, URLError, build_opener, HTTPCookieProcessor;
121 import urlparse, cookielib;
122 from httplib import HTTPConnection, HTTPSConnection;
123 if(sys.version[0]>="3"):
124 from io import StringIO, BytesIO;
125 # From http://python-future.org/compatible_idioms.html
126 from urllib.parse import urlparse, urlunparse, urlsplit, urlunsplit, urljoin, urlencode;
127 from urllib.request import urlopen, Request, install_opener, build_opener, HTTPCookieProcessor;
128 from urllib.error import HTTPError, URLError;
129 import urllib.parse as urlparse;
130 import http.cookiejar as cookielib;
131 from http.client import HTTPConnection, HTTPSConnection;
133 __program_name__ = "PyWWW-Get";
134 __program_alt_name__ = "PyWWWGet";
135 __program_small_name__ = "wwwget";
136 __project__ = __program_name__;
137 __project_url__ = "https://github.com/GameMaker2k/PyWWW-Get";
138 __version_info__ = (2, 0, 2, "RC 1", 1);
139 __version_date_info__ = (2023, 10, 5, "RC 1", 1);
140 __version_date__ = str(__version_date_info__[0])+"."+str(__version_date_info__[1]).zfill(2)+"."+str(__version_date_info__[2]).zfill(2);
141 __revision__ = __version_info__[3];
142 __revision_id__ = "$Id$";
143 if(__version_info__[4] is not None):
144 __version_date_plusrc__ = __version_date__+"-"+str(__version_date_info__[4]);
145 if(__version_info__[4] is None):
146 __version_date_plusrc__ = __version_date__;
147 if(__version_info__[3] is not None):
148 __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2])+" "+str(__version_info__[3]);
149 if(__version_info__[3] is None):
150 __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2]);
152 tmpfileprefix = "py"+str(sys.version_info[0])+__program_small_name__+str(__version_info__[0])+"-";
153 tmpfilesuffix = "-";
154 pytempdir = tempfile.gettempdir();
156 PyBitness = platform.architecture();
157 if(PyBitness=="32bit" or PyBitness=="32"):
158 PyBitness = "32";
159 elif(PyBitness=="64bit" or PyBitness=="64"):
160 PyBitness = "64";
161 else:
162 PyBitness = "32";
164 compression_supported_list = ['identity', 'gzip', 'deflate', 'bzip2'];
165 if(havebrotli):
166 compression_supported_list.append('br');
167 if(havezstd):
168 compression_supported_list.append('zstd');
169 if(havelzma):
170 compression_supported_list.append('lzma');
171 compression_supported_list.append('xz');
172 compression_supported = ', '.join(compression_supported_list);
174 geturls_cj = cookielib.CookieJar();
175 windowsNT4_ua_string = "Windows NT 4.0";
176 windowsNT4_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
177 windows2k_ua_string = "Windows NT 5.0";
178 windows2k_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
179 windowsXP_ua_string = "Windows NT 5.1";
180 windowsXP_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
181 windowsXP64_ua_string = "Windows NT 5.2; Win64; x64";
182 windowsXP64_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
183 windows7_ua_string = "Windows NT 6.1; Win64; x64";
184 windows7_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
185 windows8_ua_string = "Windows NT 6.2; Win64; x64";
186 windows8_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
187 windows81_ua_string = "Windows NT 6.3; Win64; x64";
188 windows81_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
189 windows10_ua_string = "Windows NT 10.0; Win64; x64";
190 windows10_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
191 windows11_ua_string = "Windows NT 11.0; Win64; x64";
192 windows11_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
193 geturls_ua_firefox_windows7 = "Mozilla/5.0 ("+windows7_ua_string+"; rv:109.0) Gecko/20100101 Firefox/117.0";
194 geturls_ua_seamonkey_windows7 = "Mozilla/5.0 ("+windows7_ua_string+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
195 geturls_ua_chrome_windows7 = "Mozilla/5.0 ("+windows7_ua_string+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
196 geturls_ua_chromium_windows7 = "Mozilla/5.0 ("+windows7_ua_string+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
197 geturls_ua_palemoon_windows7 = "Mozilla/5.0 ("+windows7_ua_string+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
198 geturls_ua_opera_windows7 = "Mozilla/5.0 ("+windows7_ua_string+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
199 geturls_ua_vivaldi_windows7 = "Mozilla/5.0 ("+windows7_ua_string+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
200 geturls_ua_internet_explorer_windows7 = "Mozilla/5.0 ("+windows7_ua_string+"; Trident/7.0; rv:11.0) like Gecko";
201 geturls_ua_microsoft_edge_windows7 = "Mozilla/5.0 ("+windows7_ua_string+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
202 geturls_ua_pywwwget_python = "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname=__project__, prover=__version__, prourl=__project_url__);
203 if(platform.python_implementation()!=""):
204 py_implementation = platform.python_implementation();
205 if(platform.python_implementation()==""):
206 py_implementation = "Python";
207 geturls_ua_pywwwget_python_alt = "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver=platform.system()+" "+platform.release(), archtype=platform.machine(), prourl=__project_url__, pyimp=py_implementation, pyver=platform.python_version(), proname=__project__, prover=__version__);
208 geturls_ua_googlebot_google = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
209 geturls_ua_googlebot_google_old = "Googlebot/2.1 (+http://www.google.com/bot.html)";
210 geturls_ua = geturls_ua_firefox_windows7;
211 geturls_headers_firefox_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
212 geturls_headers_seamonkey_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
213 geturls_headers_chrome_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
214 geturls_headers_chrome_windows7.update(windows7_ua_addon);
215 geturls_headers_chromium_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
216 geturls_headers_chromium_windows7.update(windows7_ua_addon);
217 geturls_headers_palemoon_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
218 geturls_headers_opera_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
219 geturls_headers_opera_windows7.update(windows7_ua_addon);
220 geturls_headers_vivaldi_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
221 geturls_headers_vivaldi_windows7.update(windows7_ua_addon);
222 geturls_headers_internet_explorer_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
223 geturls_headers_microsoft_edge_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
224 geturls_headers_microsoft_edge_windows7.update(windows7_ua_addon);
225 geturls_headers_pywwwget_python = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__+"\";v=\""+str(__version__)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation+"\";v=\""+str(platform.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__), 'SEC-CH-UA-PLATFORM': ""+py_implementation+"", 'SEC-CH-UA-ARCH': ""+platform.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__), 'SEC-CH-UA-BITNESS': str(PyBitness)};
226 geturls_headers_pywwwget_python_alt = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__+"\";v=\""+str(__version__)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation+"\";v=\""+str(platform.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__), 'SEC-CH-UA-PLATFORM': ""+py_implementation+"", 'SEC-CH-UA-ARCH': ""+platform.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__), 'SEC-CH-UA-BITNESS': str(PyBitness)};
227 geturls_headers_googlebot_google = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
228 geturls_headers_googlebot_google_old = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
229 geturls_headers = geturls_headers_firefox_windows7;
230 geturls_download_sleep = 0;
232 def verbose_printout(dbgtxt, outtype="log", dbgenable=True, dgblevel=20):
233 if(outtype=="print" and dbgenable):
234 print(dbgtxt);
235 return True;
236 elif(outtype=="log" and dbgenable):
237 logging.info(dbgtxt);
238 return True;
239 elif(outtype=="warning" and dbgenable):
240 logging.warning(dbgtxt);
241 return True;
242 elif(outtype=="error" and dbgenable):
243 logging.error(dbgtxt);
244 return True;
245 elif(outtype=="critical" and dbgenable):
246 logging.critical(dbgtxt);
247 return True;
248 elif(outtype=="exception" and dbgenable):
249 logging.exception(dbgtxt);
250 return True;
251 elif(outtype=="logalt" and dbgenable):
252 logging.log(dgblevel, dbgtxt);
253 return True;
254 elif(outtype=="debug" and dbgenable):
255 logging.debug(dbgtxt);
256 return True;
257 elif(not dbgenable):
258 return True;
259 else:
260 return False;
261 return False;
263 def verbose_printout_return(dbgtxt, outtype="log", dbgenable=True, dgblevel=20):
264 dbgout = verbose_printout(dbgtxt, outtype, dbgenable, dgblevel);
265 if(not dbgout):
266 return False;
267 return dbgtxt;
269 def add_url_param(url, **params):
270 n=3;
271 parts = list(urlparse.urlsplit(url));
272 d = dict(parse_qsl(parts[n])); # use cgi.parse_qs for list values
273 d.update(params);
274 parts[n]=urlencode(d);
275 return urlparse.urlunsplit(parts);
277 os.environ["PATH"] = os.environ["PATH"] + os.pathsep + os.path.dirname(os.path.realpath(__file__)) + os.pathsep + os.getcwd();
278 def which_exec(execfile):
279 for path in os.environ["PATH"].split(":"):
280 if os.path.exists(path + "/" + execfile):
281 return path + "/" + execfile;
283 def listize(varlist):
284 il = 0;
285 ix = len(varlist);
286 ilx = 1;
287 newlistreg = {};
288 newlistrev = {};
289 newlistfull = {};
290 while(il < ix):
291 newlistreg.update({ilx: varlist[il]});
292 newlistrev.update({varlist[il]: ilx});
293 ilx = ilx + 1;
294 il = il + 1;
295 newlistfull = {1: newlistreg, 2: newlistrev, 'reg': newlistreg, 'rev': newlistrev};
296 return newlistfull;
298 def twolistize(varlist):
299 il = 0;
300 ix = len(varlist);
301 ilx = 1;
302 newlistnamereg = {};
303 newlistnamerev = {};
304 newlistdescreg = {};
305 newlistdescrev = {};
306 newlistfull = {};
307 while(il < ix):
308 newlistnamereg.update({ilx: varlist[il][0].strip()});
309 newlistnamerev.update({varlist[il][0].strip(): ilx});
310 newlistdescreg.update({ilx: varlist[il][1].strip()});
311 newlistdescrev.update({varlist[il][1].strip(): ilx});
312 ilx = ilx + 1;
313 il = il + 1;
314 newlistnametmp = {1: newlistnamereg, 2: newlistnamerev, 'reg': newlistnamereg, 'rev': newlistnamerev};
315 newlistdesctmp = {1: newlistdescreg, 2: newlistdescrev, 'reg': newlistdescreg, 'rev': newlistdescrev};
316 newlistfull = {1: newlistnametmp, 2: newlistdesctmp, 'name': newlistnametmp, 'desc': newlistdesctmp}
317 return newlistfull;
319 def arglistize(proexec, *varlist):
320 il = 0;
321 ix = len(varlist);
322 ilx = 1;
323 newarglist = [proexec];
324 while(il < ix):
325 if varlist[il][0] is not None:
326 newarglist.append(varlist[il][0]);
327 if varlist[il][1] is not None:
328 newarglist.append(varlist[il][1]);
329 il = il + 1;
330 return newarglist;
332 def fix_header_names(header_dict):
333 if(sys.version[0]=="2"):
334 header_dict = {k.title(): v for k, v in header_dict.iteritems()};
335 if(sys.version[0]>="3"):
336 header_dict = {k.title(): v for k, v in header_dict.items()};
337 return header_dict;
339 # hms_string by ArcGIS Python Recipes
340 # https://arcpy.wordpress.com/2012/04/20/146/
341 def hms_string(sec_elapsed):
342 h = int(sec_elapsed / (60 * 60));
343 m = int((sec_elapsed % (60 * 60)) / 60);
344 s = sec_elapsed % 60.0;
345 return "{}:{:>02}:{:>05.2f}".format(h, m, s);
347 # get_readable_size by Lipis
348 # http://stackoverflow.com/posts/14998888/revisions
349 def get_readable_size(bytes, precision=1, unit="IEC"):
350 unit = unit.upper();
351 if(unit!="IEC" and unit!="SI"):
352 unit = "IEC";
353 if(unit=="IEC"):
354 units = [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
355 unitswos = ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
356 unitsize = 1024.0;
357 if(unit=="SI"):
358 units = [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
359 unitswos = ["B","kB","MB","GB","TB","PB","EB","ZB"];
360 unitsize = 1000.0;
361 return_val = {};
362 orgbytes = bytes;
363 for unit in units:
364 if abs(bytes) < unitsize:
365 strformat = "%3."+str(precision)+"f%s";
366 pre_return_val = (strformat % (bytes, unit));
367 pre_return_val = re.sub(r"([0]+) ([A-Za-z]+)", r" \2", pre_return_val);
368 pre_return_val = re.sub(r"\. ([A-Za-z]+)", r" \1", pre_return_val);
369 alt_return_val = pre_return_val.split();
370 return_val = {'Bytes': orgbytes, 'ReadableWithSuffix': pre_return_val, 'ReadableWithoutSuffix': alt_return_val[0], 'ReadableSuffix': alt_return_val[1]}
371 return return_val;
372 bytes /= unitsize;
373 strformat = "%."+str(precision)+"f%s";
374 pre_return_val = (strformat % (bytes, "YiB"));
375 pre_return_val = re.sub(r"([0]+) ([A-Za-z]+)", r" \2", pre_return_val);
376 pre_return_val = re.sub(r"\. ([A-Za-z]+)", r" \1", pre_return_val);
377 alt_return_val = pre_return_val.split();
378 return_val = {'Bytes': orgbytes, 'ReadableWithSuffix': pre_return_val, 'ReadableWithoutSuffix': alt_return_val[0], 'ReadableSuffix': alt_return_val[1]}
379 return return_val;
381 def get_readable_size_from_file(infile, precision=1, unit="IEC", usehashes=False, usehashtypes="md5,sha1"):
382 unit = unit.upper();
383 usehashtypes = usehashtypes.lower();
384 getfilesize = os.path.getsize(infile);
385 return_val = get_readable_size(getfilesize, precision, unit);
386 if(usehashes):
387 hashtypelist = usehashtypes.split(",");
388 openfile = open(infile, "rb");
389 filecontents = openfile.read();
390 openfile.close();
391 listnumcount = 0;
392 listnumend = len(hashtypelist);
393 while(listnumcount < listnumend):
394 hashtypelistlow = hashtypelist[listnumcount].strip();
395 hashtypelistup = hashtypelistlow.upper();
396 filehash = hashlib.new(hashtypelistup);
397 filehash.update(filecontents);
398 filegethash = filehash.hexdigest();
399 return_val.update({hashtypelistup: filegethash});
400 listnumcount += 1;
401 return return_val;
403 def get_readable_size_from_string(instring, precision=1, unit="IEC", usehashes=False, usehashtypes="md5,sha1"):
404 unit = unit.upper();
405 usehashtypes = usehashtypes.lower();
406 getfilesize = len(instring);
407 return_val = get_readable_size(getfilesize, precision, unit);
408 if(usehashes):
409 hashtypelist = usehashtypes.split(",");
410 listnumcount = 0;
411 listnumend = len(hashtypelist);
412 while(listnumcount < listnumend):
413 hashtypelistlow = hashtypelist[listnumcount].strip();
414 hashtypelistup = hashtypelistlow.upper();
415 filehash = hashlib.new(hashtypelistup);
416 if(sys.version[0]=="2"):
417 filehash.update(instring);
418 if(sys.version[0]>="3"):
419 filehash.update(instring.encode('utf-8'));
420 filegethash = filehash.hexdigest();
421 return_val.update({hashtypelistup: filegethash});
422 listnumcount += 1;
423 return return_val;
425 def http_status_to_reason(code):
426 reasons = {
427 100: 'Continue',
428 101: 'Switching Protocols',
429 102: 'Processing',
430 200: 'OK',
431 201: 'Created',
432 202: 'Accepted',
433 203: 'Non-Authoritative Information',
434 204: 'No Content',
435 205: 'Reset Content',
436 206: 'Partial Content',
437 207: 'Multi-Status',
438 208: 'Already Reported',
439 226: 'IM Used',
440 300: 'Multiple Choices',
441 301: 'Moved Permanently',
442 302: 'Found',
443 303: 'See Other',
444 304: 'Not Modified',
445 305: 'Use Proxy',
446 307: 'Temporary Redirect',
447 308: 'Permanent Redirect',
448 400: 'Bad Request',
449 401: 'Unauthorized',
450 402: 'Payment Required',
451 403: 'Forbidden',
452 404: 'Not Found',
453 405: 'Method Not Allowed',
454 406: 'Not Acceptable',
455 407: 'Proxy Authentication Required',
456 408: 'Request Timeout',
457 409: 'Conflict',
458 410: 'Gone',
459 411: 'Length Required',
460 412: 'Precondition Failed',
461 413: 'Payload Too Large',
462 414: 'URI Too Long',
463 415: 'Unsupported Media Type',
464 416: 'Range Not Satisfiable',
465 417: 'Expectation Failed',
466 421: 'Misdirected Request',
467 422: 'Unprocessable Entity',
468 423: 'Locked',
469 424: 'Failed Dependency',
470 426: 'Upgrade Required',
471 428: 'Precondition Required',
472 429: 'Too Many Requests',
473 431: 'Request Header Fields Too Large',
474 451: 'Unavailable For Legal Reasons',
475 500: 'Internal Server Error',
476 501: 'Not Implemented',
477 502: 'Bad Gateway',
478 503: 'Service Unavailable',
479 504: 'Gateway Timeout',
480 505: 'HTTP Version Not Supported',
481 506: 'Variant Also Negotiates',
482 507: 'Insufficient Storage',
483 508: 'Loop Detected',
484 510: 'Not Extended',
485 511: 'Network Authentication Required'
487 return reasons.get(code, 'Unknown Status Code');
489 def ftp_status_to_reason(code):
490 reasons = {
491 110: 'Restart marker reply',
492 120: 'Service ready in nnn minutes',
493 125: 'Data connection already open; transfer starting',
494 150: 'File status okay; about to open data connection',
495 200: 'Command okay',
496 202: 'Command not implemented, superfluous at this site',
497 211: 'System status, or system help reply',
498 212: 'Directory status',
499 213: 'File status',
500 214: 'Help message',
501 215: 'NAME system type',
502 220: 'Service ready for new user',
503 221: 'Service closing control connection',
504 225: 'Data connection open; no transfer in progress',
505 226: 'Closing data connection',
506 227: 'Entering Passive Mode',
507 230: 'User logged in, proceed',
508 250: 'Requested file action okay, completed',
509 257: '"PATHNAME" created',
510 331: 'User name okay, need password',
511 332: 'Need account for login',
512 350: 'Requested file action pending further information',
513 421: 'Service not available, closing control connection',
514 425: 'Can\'t open data connection',
515 426: 'Connection closed; transfer aborted',
516 450: 'Requested file action not taken',
517 451: 'Requested action aborted. Local error in processing',
518 452: 'Requested action not taken. Insufficient storage space in system',
519 500: 'Syntax error, command unrecognized',
520 501: 'Syntax error in parameters or arguments',
521 502: 'Command not implemented',
522 503: 'Bad sequence of commands',
523 504: 'Command not implemented for that parameter',
524 530: 'Not logged in',
525 532: 'Need account for storing files',
526 550: 'Requested action not taken. File unavailable',
527 551: 'Requested action aborted. Page type unknown',
528 552: 'Requested file action aborted. Exceeded storage allocation',
529 553: 'Requested action not taken. File name not allowed'
531 return reasons.get(code, 'Unknown Status Code');
533 def sftp_status_to_reason(code):
534 reasons = {
535 0: 'SSH_FX_OK',
536 1: 'SSH_FX_EOF',
537 2: 'SSH_FX_NO_SUCH_FILE',
538 3: 'SSH_FX_PERMISSION_DENIED',
539 4: 'SSH_FX_FAILURE',
540 5: 'SSH_FX_BAD_MESSAGE',
541 6: 'SSH_FX_NO_CONNECTION',
542 7: 'SSH_FX_CONNECTION_LOST',
543 8: 'SSH_FX_OP_UNSUPPORTED'
545 return reasons.get(code, 'Unknown Status Code');
547 def make_http_headers_from_dict_to_list(headers={'Referer': "http://google.com/", 'User-Agent': geturls_ua, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
548 if isinstance(headers, dict):
549 returnval = [];
550 if(sys.version[0]=="2"):
551 for headkey, headvalue in headers.iteritems():
552 returnval.append((headkey, headvalue));
553 if(sys.version[0]>="3"):
554 for headkey, headvalue in headers.items():
555 returnval.append((headkey, headvalue));
556 elif isinstance(headers, list):
557 returnval = headers;
558 else:
559 returnval = False;
560 return returnval;
562 def make_http_headers_from_dict_to_pycurl(headers={'Referer': "http://google.com/", 'User-Agent': geturls_ua, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
563 if isinstance(headers, dict):
564 returnval = [];
565 if(sys.version[0]=="2"):
566 for headkey, headvalue in headers.iteritems():
567 returnval.append(headkey+": "+headvalue);
568 if(sys.version[0]>="3"):
569 for headkey, headvalue in headers.items():
570 returnval.append(headkey+": "+headvalue);
571 elif isinstance(headers, list):
572 returnval = headers;
573 else:
574 returnval = False;
575 return returnval;
577 def make_http_headers_from_pycurl_to_dict(headers):
578 header_dict = {};
579 headers = headers.strip().split('\r\n');
580 for header in headers:
581 parts = header.split(': ', 1)
582 if(len(parts) == 2):
583 key, value = parts;
584 header_dict[key.title()] = value;
585 return header_dict;
587 def make_http_headers_from_list_to_dict(headers=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua), ("Accept-Encoding", compression_supported), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
588 if isinstance(headers, list):
589 returnval = {};
590 mli = 0;
591 mlil = len(headers);
592 while(mli<mlil):
593 returnval.update({headers[mli][0]: headers[mli][1]});
594 mli = mli + 1;
595 elif isinstance(headers, dict):
596 returnval = headers;
597 else:
598 returnval = False;
599 return returnval;
601 def get_httplib_support(checkvalue=None):
602 global haverequests, havemechanize, havehttplib2, haveurllib3, havehttpx, havehttpcore, haveparamiko, havepysftp;
603 returnval = [];
604 returnval.append("ftp");
605 returnval.append("httplib");
606 if(havehttplib2):
607 returnval.append("httplib2");
608 returnval.append("urllib");
609 if(haveurllib3):
610 returnval.append("urllib3");
611 returnval.append("request3");
612 returnval.append("request");
613 if(haverequests):
614 returnval.append("requests");
615 if(haveaiohttp):
616 returnval.append("aiohttp");
617 if(havehttpx):
618 returnval.append("httpx");
619 returnval.append("httpx2");
620 if(havemechanize):
621 returnval.append("mechanize");
622 if(havepycurl):
623 returnval.append("pycurl");
624 if(hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
625 returnval.append("pycurl2");
626 if(hasattr(pycurl, "CURL_HTTP_VERSION_3_0")):
627 returnval.append("pycurl3");
628 if(haveparamiko):
629 returnval.append("sftp");
630 if(havepysftp):
631 returnval.append("pysftp");
632 if(not checkvalue is None):
633 if(checkvalue=="urllib1" or checkvalue=="urllib2"):
634 checkvalue = "urllib";
635 if(checkvalue=="httplib1"):
636 checkvalue = "httplib";
637 if(checkvalue in returnval):
638 returnval = True;
639 else:
640 returnval = False;
641 return returnval;
643 def check_httplib_support(checkvalue="urllib"):
644 if(checkvalue=="urllib1" or checkvalue=="urllib2"):
645 checkvalue = "urllib";
646 if(checkvalue=="httplib1"):
647 checkvalue = "httplib";
648 returnval = get_httplib_support(checkvalue);
649 return returnval;
651 def get_httplib_support_list():
652 returnval = get_httplib_support(None);
653 return returnval;
655 def download_from_url(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, httplibuse="urllib", buffersize=524288, sleep=-1, timeout=10):
656 global geturls_download_sleep, havezstd, havebrotli, haveaiohttp, haverequests, havemechanize, havepycurl, havehttplib2, haveurllib3, havehttpx, havehttpcore, haveparamiko, havepysftp;
657 if(sleep<0):
658 sleep = geturls_download_sleep;
659 if(timeout<=0):
660 timeout = 10;
661 if(httplibuse=="urllib1" or httplibuse=="urllib2" or httplibuse=="request"):
662 httplibuse = "urllib";
663 if(httplibuse=="httplib1"):
664 httplibuse = "httplib";
665 if(not haverequests and httplibuse=="requests"):
666 httplibuse = "urllib";
667 if(not haveaiohttp and httplibuse=="aiohttp"):
668 httplibuse = "urllib";
669 if(not havehttpx and httplibuse=="httpx"):
670 httplibuse = "urllib";
671 if(not havehttpx and httplibuse=="httpx2"):
672 httplibuse = "urllib";
673 if(not havehttpcore and httplibuse=="httpcore"):
674 httplibuse = "urllib";
675 if(not havehttpcore and httplibuse=="httpcore2"):
676 httplibuse = "urllib";
677 if(not havemechanize and httplibuse=="mechanize"):
678 httplibuse = "urllib";
679 if(not havepycurl and httplibuse=="pycurl"):
680 httplibuse = "urllib";
681 if(not havepycurl and httplibuse=="pycurl2"):
682 httplibuse = "urllib";
683 if(havepycurl and httplibuse=="pycurl2" and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
684 httplibuse = "pycurl";
685 if(not havepycurl and httplibuse=="pycurl3"):
686 httplibuse = "urllib";
687 if(havepycurl and httplibuse=="pycurl3" and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
688 httplibuse = "pycurl2";
689 if(havepycurl and httplibuse=="pycurl3" and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
690 httplibuse = "pycurl";
691 if(not havehttplib2 and httplibuse=="httplib2"):
692 httplibuse = "httplib";
693 if(not haveparamiko and httplibuse=="sftp"):
694 httplibuse = "ftp";
695 if(not havepysftp and httplibuse=="pysftp"):
696 httplibuse = "ftp";
697 if(httplibuse=="urllib" or httplibuse=="request"):
698 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
699 elif(httplibuse=="request"):
700 returnval = download_from_url_with_request(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
701 elif(httplibuse=="request3"):
702 returnval = download_from_url_with_request3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
703 elif(httplibuse=="httplib"):
704 returnval = download_from_url_with_httplib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
705 elif(httplibuse=="httplib2"):
706 returnval = download_from_url_with_httplib2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
707 elif(httplibuse=="urllib3" or httplibuse=="request3"):
708 returnval = download_from_url_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
709 elif(httplibuse=="requests"):
710 returnval = download_from_url_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
711 elif(httplibuse=="aiohttp"):
712 returnval = download_from_url_with_aiohttp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
713 elif(httplibuse=="httpx"):
714 returnval = download_from_url_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
715 elif(httplibuse=="httpx2"):
716 returnval = download_from_url_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
717 elif(httplibuse=="httpcore"):
718 returnval = download_from_url_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
719 elif(httplibuse=="httpcore2"):
720 returnval = download_from_url_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
721 elif(httplibuse=="mechanize"):
722 returnval = download_from_url_with_mechanize(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
723 elif(httplibuse=="pycurl"):
724 returnval = download_from_url_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
725 elif(httplibuse=="pycurl2"):
726 returnval = download_from_url_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
727 elif(httplibuse=="pycurl3"):
728 returnval = download_from_url_with_pycurl3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
729 elif(httplibuse=="ftp"):
730 returnval = download_from_url_with_ftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
731 elif(httplibuse=="sftp"):
732 returnval = download_from_url_with_sftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
733 elif(httplibuse=="pysftp"):
734 returnval = download_from_url_with_pysftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
735 else:
736 returnval = False;
737 return returnval;
739 def download_from_url_from_list(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, httplibuse="urllib", buffersize=524288, sleep=-1, timeout=10):
740 if(isinstance(httpurl, list)):
741 pass;
742 elif(isinstance(httpurl, tuple)):
743 pass;
744 elif(isinstance(httpurl, dict)):
745 httpurl = httpurl.values();
746 else:
747 httpurl = [httpurl];
748 listsize = len(httpurl);
749 listcount = 0;
750 returnval = [];
751 while(listcount<listsize):
752 ouputval = download_from_url(httpurl[listcount], httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, httplibuse, buffersize, sleep, timeout);
753 returnval.append(ouputval);
754 listcount += 1;
755 return returnval;
757 def download_from_url_file(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, httplibuse="urllib", ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
758 global geturls_download_sleep, havezstd, havebrotli, haveaiohttp, haverequests, havemechanize, havepycurl, havehttplib2, haveurllib3, havehttpx, havehttpcore, haveparamiko, havepysftp;
759 if(sleep<0):
760 sleep = geturls_download_sleep;
761 if(timeout<=0):
762 timeout = 10;
763 if(httplibuse=="urllib1" or httplibuse=="urllib2" or httplibuse=="request"):
764 httplibuse = "urllib";
765 if(httplibuse=="httplib1"):
766 httplibuse = "httplib";
767 if(not haverequests and httplibuse=="requests"):
768 httplibuse = "urllib";
769 if(not haveaiohttp and httplibuse=="aiohttp"):
770 httplibuse = "urllib";
771 if(not havehttpx and httplibuse=="httpx"):
772 httplibuse = "urllib";
773 if(not havehttpx and httplibuse=="httpx2"):
774 httplibuse = "urllib";
775 if(not havehttpcore and httplibuse=="httpcore"):
776 httplibuse = "urllib";
777 if(not havehttpcore and httplibuse=="httpcore2"):
778 httplibuse = "urllib";
779 if(not havemechanize and httplibuse=="mechanize"):
780 httplibuse = "urllib";
781 if(not havepycurl and httplibuse=="pycurl"):
782 httplibuse = "urllib";
783 if(not havepycurl and httplibuse=="pycurl2"):
784 httplibuse = "urllib";
785 if(havepycurl and httplibuse=="pycurl2" and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
786 httplibuse = "pycurl";
787 if(not havepycurl and httplibuse=="pycurl3"):
788 httplibuse = "urllib";
789 if(havepycurl and httplibuse=="pycurl3" and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
790 httplibuse = "pycurl2";
791 if(havepycurl and httplibuse=="pycurl3" and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
792 httplibuse = "pycurl";
793 if(not havehttplib2 and httplibuse=="httplib2"):
794 httplibuse = "httplib";
795 if(not haveparamiko and httplibuse=="sftp"):
796 httplibuse = "ftp";
797 if(not haveparamiko and httplibuse=="pysftp"):
798 httplibuse = "ftp";
799 if(httplibuse=="urllib" or httplibuse=="request"):
800 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
801 elif(httplibuse=="request"):
802 returnval = download_from_url_file_with_request(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
803 elif(httplibuse=="request3"):
804 returnval = download_from_url_file_with_request3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
805 elif(httplibuse=="httplib"):
806 returnval = download_from_url_file_with_httplib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
807 elif(httplibuse=="httplib2"):
808 returnval = download_from_url_file_with_httplib2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
809 elif(httplibuse=="urllib3" or httplibuse=="request3"):
810 returnval = download_from_url_file_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
811 elif(httplibuse=="requests"):
812 returnval = download_from_url_file_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
813 elif(httplibuse=="aiohttp"):
814 returnval = download_from_url_file_with_aiohttp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
815 elif(httplibuse=="httpx"):
816 returnval = download_from_url_file_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
817 elif(httplibuse=="httpx2"):
818 returnval = download_from_url_file_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
819 elif(httplibuse=="httpcore"):
820 returnval = download_from_url_file_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
821 elif(httplibuse=="httpcore2"):
822 returnval = download_from_url_file_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
823 elif(httplibuse=="mechanize"):
824 returnval = download_from_url_file_with_mechanize(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
825 elif(httplibuse=="pycurl"):
826 returnval = download_from_url_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
827 elif(httplibuse=="pycurl2"):
828 returnval = download_from_url_file_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
829 elif(httplibuse=="pycurl3"):
830 returnval = download_from_url_file_with_pycurl3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
831 elif(httplibuse=="ftp"):
832 returnval = download_from_url_file_with_ftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
833 elif(httplibuse=="sftp"):
834 returnval = download_from_url_file_with_sftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
835 elif(httplibuse=="pysftp"):
836 returnval = download_from_url_file_with_pysftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
837 else:
838 returnval = False;
839 return returnval;
841 def download_from_url_file_with_list(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, httplibuse="urllib", ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
842 if(isinstance(httpurl, list)):
843 pass;
844 elif(isinstance(httpurl, tuple)):
845 pass;
846 elif(isinstance(httpurl, dict)):
847 httpurl = httpurl.values();
848 else:
849 httpurl = [httpurl];
850 listsize = len(httpurl);
851 listcount = 0;
852 returnval = [];
853 while(listcount<listsize):
854 ouputval = download_from_url_file(httpurl[listcount], httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, httplibuse, ranges, buffersize, sleep, timeout);
855 returnval.append(ouputval);
856 listcount += 1;
857 return returnval;
859 def download_from_url_to_file(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, httplibuse="urllib", outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
860 global geturls_download_sleep, havezstd, havebrotli, haveaiohttp, haverequests, havemechanize, havepycurl, havehttplib2, haveurllib3, havehttpx, havehttpcore, haveparamiko, havepysftp;
861 if(sleep<0):
862 sleep = geturls_download_sleep;
863 if(timeout<=0):
864 timeout = 10;
865 if(httplibuse=="urllib1" or httplibuse=="urllib2" or httplibuse=="request"):
866 httplibuse = "urllib";
867 if(httplibuse=="httplib1"):
868 httplibuse = "httplib";
869 if(not haverequests and httplibuse=="requests"):
870 httplibuse = "urllib";
871 if(not haveaiohttp and httplibuse=="aiohttp"):
872 httplibuse = "urllib";
873 if(not havehttpx and httplibuse=="httpx"):
874 httplibuse = "urllib";
875 if(not havehttpx and httplibuse=="httpx2"):
876 httplibuse = "urllib";
877 if(not havehttpcore and httplibuse=="httpcore"):
878 httplibuse = "urllib";
879 if(not havehttpcore and httplibuse=="httpcore2"):
880 httplibuse = "urllib";
881 if(not havemechanize and httplibuse=="mechanize"):
882 httplibuse = "urllib";
883 if(not havepycurl and httplibuse=="pycurl"):
884 httplibuse = "urllib";
885 if(not havepycurl and httplibuse=="pycurl2"):
886 httplibuse = "urllib";
887 if(havepycurl and httplibuse=="pycurl2" and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
888 httplibuse = "pycurl";
889 if(not havepycurl and httplibuse=="pycurl3"):
890 httplibuse = "urllib";
891 if(havepycurl and httplibuse=="pycurl3" and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
892 httplibuse = "pycurl2";
893 if(havepycurl and httplibuse=="pycurl3" and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
894 httplibuse = "pycurl";
895 if(not havehttplib2 and httplibuse=="httplib2"):
896 httplibuse = "httplib";
897 if(not haveparamiko and httplibuse=="sftp"):
898 httplibuse = "ftp";
899 if(not havepysftp and httplibuse=="pysftp"):
900 httplibuse = "ftp";
901 if(httplibuse=="urllib" or httplibuse=="request"):
902 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
903 elif(httplibuse=="request"):
904 returnval = download_from_url_to_file_with_request(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
905 elif(httplibuse=="request3"):
906 returnval = download_from_url_to_file_with_request3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
907 elif(httplibuse=="httplib"):
908 returnval = download_from_url_to_file_with_httplib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
909 elif(httplibuse=="httplib2"):
910 returnval = download_from_url_to_file_with_httplib2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
911 elif(httplibuse=="urllib3" or httplibuse=="request3"):
912 returnval = download_from_url_to_file_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
913 elif(httplibuse=="requests"):
914 returnval = download_from_url_to_file_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
915 elif(httplibuse=="aiohttp"):
916 returnval = download_from_url_to_file_with_aiohttp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
917 elif(httplibuse=="httpx"):
918 returnval = download_from_url_to_file_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
919 elif(httplibuse=="httpx2"):
920 returnval = download_from_url_to_file_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
921 elif(httplibuse=="httpcore"):
922 returnval = download_from_url_to_file_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
923 elif(httplibuse=="httpcore2"):
924 returnval = download_from_url_to_file_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
925 elif(httplibuse=="mechanize"):
926 returnval = download_from_url_to_file_with_mechanize(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
927 elif(httplibuse=="pycurl"):
928 returnval = download_from_url_to_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
929 elif(httplibuse=="pycurl2"):
930 returnval = download_from_url_to_file_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
931 elif(httplibuse=="pycurl3"):
932 returnval = download_from_url_to_file_with_pycurl3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
933 elif(httplibuse=="ftp"):
934 returnval = download_from_url_to_file_with_ftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
935 elif(httplibuse=="sftp"):
936 returnval = download_from_url_to_file_with_sftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
937 elif(httplibuse=="pysftp"):
938 returnval = download_from_url_to_file_with_pysftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
939 else:
940 returnval = False;
941 return returnval;
943 def download_from_url_to_file_with_list(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, httplibuse="urllib", outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
944 if(isinstance(httpurl, list)):
945 pass;
946 elif(isinstance(httpurl, tuple)):
947 pass;
948 elif(isinstance(httpurl, dict)):
949 httpurl = httpurl.values();
950 else:
951 httpurl = [httpurl];
952 listsize = len(httpurl);
953 listcount = 0;
954 returnval = [];
955 while(listcount<listsize):
956 ouputval = download_from_url_to_file(httpurl[listcount], httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, httplibuse, outfile, outpath, ranges, buffersize, sleep, timeout);
957 returnval.append(ouputval);
958 listcount += 1;
959 return returnval;
961 def download_from_url_with_urllib(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
962 global geturls_download_sleep, havezstd, havebrotli;
963 if(sleep<0):
964 sleep = geturls_download_sleep;
965 if(timeout<=0):
966 timeout = 10;
967 urlparts = urlparse.urlparse(httpurl);
968 if(isinstance(httpheaders, list)):
969 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
970 httpheaders = fix_header_names(httpheaders);
971 if(httpuseragent is not None):
972 if('User-Agent' in httpheaders):
973 httpheaders['User-Agent'] = httpuseragent;
974 else:
975 httpuseragent.update({'User-Agent': httpuseragent});
976 if(httpreferer is not None):
977 if('Referer' in httpheaders):
978 httpheaders['Referer'] = httpreferer;
979 else:
980 httpuseragent.update({'Referer': httpreferer});
981 if(urlparts.username is not None or urlparts.password is not None):
982 if(sys.version[0]=="2"):
983 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
984 if(sys.version[0]>="3"):
985 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
986 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
987 geturls_opener = build_opener(HTTPCookieProcessor(httpcookie));
988 if(isinstance(httpheaders, dict)):
989 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
990 geturls_opener.addheaders = httpheaders;
991 time.sleep(sleep);
992 if(postdata is not None and not isinstance(postdata, dict)):
993 postdata = urlencode(postdata);
994 try:
995 geturls_request = Request(httpurl);
996 if(httpmethod=="GET"):
997 geturls_text = geturls_opener.open(geturls_request);
998 elif(httpmethod=="POST"):
999 geturls_text = geturls_opener.open(geturls_request, data=postdata);
1000 else:
1001 geturls_text = geturls_opener.open(geturls_request);
1002 except HTTPError as geturls_text_error:
1003 geturls_text = geturls_text_error;
1004 log.info("Error With URL "+httpurl);
1005 except URLError:
1006 log.info("Error With URL "+httpurl);
1007 return False;
1008 except socket.timeout:
1009 log.info("Error With URL "+httpurl);
1010 return False;
1011 httpcodeout = geturls_text.getcode();
1012 try:
1013 httpcodereason = geturls_text.reason;
1014 except AttributeError:
1015 httpcodereason = http_status_to_reason(geturls_text.getcode());
1016 try:
1017 httpversionout = geturls_text.version;
1018 except AttributeError:
1019 httpversionout = "1.1";
1020 httpmethodout = geturls_request.get_method();
1021 httpurlout = geturls_text.geturl();
1022 httpheaderout = geturls_text.info();
1023 httpheadersentout = httpheaders;
1024 if(isinstance(httpheaderout, list)):
1025 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
1026 httpheaderout = fix_header_names(httpheaderout);
1027 if(sys.version[0]=="2"):
1028 try:
1029 prehttpheaderout = httpheaderout;
1030 httpheaderkeys = httpheaderout.keys();
1031 imax = len(httpheaderkeys);
1032 ic = 0;
1033 httpheaderout = {};
1034 while(ic < imax):
1035 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
1036 ic += 1;
1037 except AttributeError:
1038 pass;
1039 if(isinstance(httpheadersentout, list)):
1040 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
1041 httpheadersentout = fix_header_names(httpheadersentout);
1042 downloadsize = httpheaderout.get('Content-Length');
1043 if(downloadsize is not None):
1044 downloadsize = int(downloadsize);
1045 if downloadsize is None: downloadsize = 0;
1046 fulldatasize = 0;
1047 prevdownsize = 0;
1048 log.info("Downloading URL "+httpurl);
1049 with BytesIO() as strbuf:
1050 while True:
1051 databytes = geturls_text.read(buffersize);
1052 if not databytes: break;
1053 datasize = len(databytes);
1054 fulldatasize = datasize + fulldatasize;
1055 percentage = "";
1056 if(downloadsize>0):
1057 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1058 downloaddiff = fulldatasize - prevdownsize;
1059 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1060 prevdownsize = fulldatasize;
1061 strbuf.write(databytes);
1062 strbuf.seek(0);
1063 returnval_content = strbuf.read();
1064 if(httpheaderout.get("Content-Encoding")=="gzip"):
1065 try:
1066 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
1067 except zlib.error:
1068 pass;
1069 elif(httpheaderout.get("Content-Encoding")=="deflate"):
1070 try:
1071 returnval_content = zlib.decompress(returnval_content);
1072 except zlib.error:
1073 pass;
1074 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
1075 try:
1076 returnval_content = brotli.decompress(returnval_content);
1077 except brotli.error:
1078 pass;
1079 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
1080 try:
1081 returnval_content = zstandard.decompress(returnval_content);
1082 except zstandard.error:
1083 pass;
1084 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
1085 try:
1086 returnval_content = lzma.decompress(returnval_content);
1087 except zstandard.error:
1088 pass;
1089 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
1090 try:
1091 returnval_content = bz2.decompress(returnval_content);
1092 except zstandard.error:
1093 pass;
1094 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "urllib"};
1095 geturls_text.close();
1096 return returnval;
1098 def download_from_url_file_with_urllib(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1099 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
1100 exec_time_start = time.time();
1101 myhash = hashlib.new("sha1");
1102 if(sys.version[0]=="2"):
1103 myhash.update(httpurl);
1104 myhash.update(str(buffersize));
1105 myhash.update(str(exec_time_start));
1106 if(sys.version[0]>="3"):
1107 myhash.update(httpurl.encode('utf-8'));
1108 myhash.update(str(buffersize).encode('utf-8'));
1109 myhash.update(str(exec_time_start).encode('utf-8'));
1110 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
1111 if(sleep<0):
1112 sleep = geturls_download_sleep;
1113 if(timeout<=0):
1114 timeout = 10;
1115 pretmpfilename = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
1116 if(not pretmpfilename):
1117 return False;
1118 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
1119 tmpfilename = f.name;
1120 try:
1121 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1122 except AttributeError:
1123 try:
1124 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1125 except ValueError:
1126 pass;
1127 except ValueError:
1128 pass;
1129 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1130 f.write(pretmpfilename.get('Content'));
1131 f.close();
1132 exec_time_end = time.time();
1133 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
1134 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
1135 return returnval;
1137 def download_from_url_to_file_with_urllib(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1138 global geturls_download_sleep, havezstd, havebrotli;
1139 if(sleep<0):
1140 sleep = geturls_download_sleep;
1141 if(timeout<=0):
1142 timeout = 10;
1143 if(not outfile=="-"):
1144 outpath = outpath.rstrip(os.path.sep);
1145 filepath = os.path.realpath(outpath+os.path.sep+outfile);
1146 if(not os.path.exists(outpath)):
1147 os.makedirs(outpath);
1148 if(os.path.exists(outpath) and os.path.isfile(outpath)):
1149 return False;
1150 if(os.path.exists(filepath) and os.path.isdir(filepath)):
1151 return False;
1152 pretmpfilename = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1153 if(not pretmpfilename):
1154 return False;
1155 tmpfilename = pretmpfilename.get('Filename');
1156 downloadsize = int(os.path.getsize(tmpfilename));
1157 fulldatasize = 0;
1158 log.info("Moving file "+tmpfilename+" to "+filepath);
1159 exec_time_start = time.time();
1160 shutil.move(tmpfilename, filepath);
1161 try:
1162 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1163 except AttributeError:
1164 try:
1165 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1166 except ValueError:
1167 pass;
1168 except ValueError:
1169 pass;
1170 exec_time_end = time.time();
1171 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
1172 if(os.path.exists(tmpfilename)):
1173 os.remove(tmpfilename);
1174 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1175 if(outfile=="-"):
1176 pretmpfilename = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1177 tmpfilename = pretmpfilename.get('Filename');
1178 downloadsize = int(os.path.getsize(tmpfilename));
1179 fulldatasize = 0;
1180 prevdownsize = 0;
1181 exec_time_start = time.time();
1182 with open(tmpfilename, 'rb') as ft:
1183 f = BytesIO();
1184 while True:
1185 databytes = ft.read(buffersize[1]);
1186 if not databytes: break;
1187 datasize = len(databytes);
1188 fulldatasize = datasize + fulldatasize;
1189 percentage = "";
1190 if(downloadsize>0):
1191 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1192 downloaddiff = fulldatasize - prevdownsize;
1193 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1194 prevdownsize = fulldatasize;
1195 f.write(databytes);
1196 f.seek(0);
1197 fdata = f.getvalue();
1198 f.close();
1199 ft.close();
1200 os.remove(tmpfilename);
1201 exec_time_end = time.time();
1202 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
1203 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1204 return returnval;
1206 def download_from_url_with_httplib(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1207 global geturls_download_sleep, havezstd, havebrotli, havezstd, havebrotli;
1208 if(sleep<0):
1209 sleep = geturls_download_sleep;
1210 if(timeout<=0):
1211 timeout = 10;
1212 urlparts = urlparse.urlparse(httpurl);
1213 if(isinstance(httpheaders, list)):
1214 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
1215 httpheaders = fix_header_names(httpheaders);
1216 if(httpuseragent is not None):
1217 if('User-Agent' in httpheaders):
1218 httpheaders['User-Agent'] = httpuseragent;
1219 else:
1220 httpuseragent.update({'User-Agent': httpuseragent});
1221 if(httpreferer is not None):
1222 if('Referer' in httpheaders):
1223 httpheaders['Referer'] = httpreferer;
1224 else:
1225 httpuseragent.update({'Referer': httpreferer});
1226 if(urlparts.username is not None or urlparts.password is not None):
1227 if(sys.version[0]=="2"):
1228 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
1229 if(sys.version[0]>="3"):
1230 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
1231 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
1232 geturls_opener = build_opener(HTTPCookieProcessor(httpcookie));
1233 geturls_opener.addheaders = httpheaders;
1234 time.sleep(sleep);
1235 if(urlparts[0]=="http"):
1236 httpconn = HTTPConnection(urlparts[1], timeout=timeout);
1237 elif(urlparts[0]=="https"):
1238 httpconn = HTTPSConnection(urlparts[1], timeout=timeout);
1239 else:
1240 return False;
1241 if(postdata is not None and not isinstance(postdata, dict)):
1242 postdata = urlencode(postdata);
1243 try:
1244 if(httpmethod=="GET"):
1245 httpconn.request("GET", urlparts[2], headers=httpheaders);
1246 elif(httpmethod=="POST"):
1247 httpconn.request("GET", urlparts[2], body=postdata, headers=httpheaders);
1248 else:
1249 httpconn.request("GET", urlparts[2], headers=httpheaders);
1250 except socket.timeout:
1251 log.info("Error With URL "+httpurl);
1252 return False;
1253 except socket.gaierror:
1254 log.info("Error With URL "+httpurl);
1255 return False;
1256 except BlockingIOError:
1257 log.info("Error With URL "+httpurl);
1258 return False;
1259 geturls_text = httpconn.getresponse();
1260 httpcodeout = geturls_text.status;
1261 httpcodereason = geturls_text.reason;
1262 if(geturls_text.version=="10"):
1263 httpversionout = "1.0";
1264 else:
1265 httpversionout = "1.1";
1266 httpmethodout = geturls_text._method;
1267 httpurlout = httpurl;
1268 httpheaderout = geturls_text.getheaders();
1269 httpheadersentout = httpheaders;
1270 if(isinstance(httpheaderout, list)):
1271 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
1272 if(sys.version[0]=="2"):
1273 try:
1274 prehttpheaderout = httpheaderout;
1275 httpheaderkeys = httpheaderout.keys();
1276 imax = len(httpheaderkeys);
1277 ic = 0;
1278 httpheaderout = {};
1279 while(ic < imax):
1280 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
1281 ic += 1;
1282 except AttributeError:
1283 pass;
1284 httpheaderout = fix_header_names(httpheaderout);
1285 if(isinstance(httpheadersentout, list)):
1286 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
1287 httpheadersentout = fix_header_names(httpheadersentout);
1288 downloadsize = httpheaderout.get('Content-Length');
1289 if(downloadsize is not None):
1290 downloadsize = int(downloadsize);
1291 if downloadsize is None: downloadsize = 0;
1292 fulldatasize = 0;
1293 prevdownsize = 0;
1294 log.info("Downloading URL "+httpurl);
1295 with BytesIO() as strbuf:
1296 while True:
1297 databytes = geturls_text.read(buffersize);
1298 if not databytes: break;
1299 datasize = len(databytes);
1300 fulldatasize = datasize + fulldatasize;
1301 percentage = "";
1302 if(downloadsize>0):
1303 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1304 downloaddiff = fulldatasize - prevdownsize;
1305 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1306 prevdownsize = fulldatasize;
1307 strbuf.write(databytes);
1308 strbuf.seek(0);
1309 returnval_content = strbuf.read();
1310 if(httpheaderout.get("Content-Encoding")=="gzip"):
1311 try:
1312 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
1313 except zlib.error:
1314 pass;
1315 elif(httpheaderout.get("Content-Encoding")=="deflate"):
1316 try:
1317 returnval_content = zlib.decompress(returnval_content);
1318 except zlib.error:
1319 pass;
1320 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
1321 try:
1322 returnval_content = brotli.decompress(returnval_content);
1323 except brotli.error:
1324 pass;
1325 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
1326 try:
1327 returnval_content = zstandard.decompress(returnval_content);
1328 except zstandard.error:
1329 pass;
1330 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
1331 try:
1332 returnval_content = lzma.decompress(returnval_content);
1333 except zstandard.error:
1334 pass;
1335 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
1336 try:
1337 returnval_content = bz2.decompress(returnval_content);
1338 except zstandard.error:
1339 pass;
1340 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "httplib"};
1341 geturls_text.close();
1342 return returnval;
1344 def download_from_url_file_with_httplib(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1345 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
1346 exec_time_start = time.time();
1347 myhash = hashlib.new("sha1");
1348 if(sys.version[0]=="2"):
1349 myhash.update(httpurl);
1350 myhash.update(str(buffersize));
1351 myhash.update(str(exec_time_start));
1352 if(sys.version[0]>="3"):
1353 myhash.update(httpurl.encode('utf-8'));
1354 myhash.update(str(buffersize).encode('utf-8'));
1355 myhash.update(str(exec_time_start).encode('utf-8'));
1356 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
1357 if(sleep<0):
1358 sleep = geturls_download_sleep;
1359 if(timeout<=0):
1360 timeout = 10;
1361 pretmpfilename = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
1362 if(not pretmpfilename):
1363 return False;
1364 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
1365 tmpfilename = f.name;
1366 try:
1367 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1368 except AttributeError:
1369 try:
1370 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1371 except ValueError:
1372 pass;
1373 except ValueError:
1374 pass;
1375 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1376 f.write(pretmpfilename.get('Content'));
1377 f.close();
1378 exec_time_end = time.time();
1379 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
1380 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
1381 return returnval;
1383 def download_from_url_to_file_with_httplib(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1384 global geturls_download_sleep, havezstd, havebrotli;
1385 if(sleep<0):
1386 sleep = geturls_download_sleep;
1387 if(timeout<=0):
1388 timeout = 10;
1389 if(not outfile=="-"):
1390 outpath = outpath.rstrip(os.path.sep);
1391 filepath = os.path.realpath(outpath+os.path.sep+outfile);
1392 if(not os.path.exists(outpath)):
1393 os.makedirs(outpath);
1394 if(os.path.exists(outpath) and os.path.isfile(outpath)):
1395 return False;
1396 if(os.path.exists(filepath) and os.path.isdir(filepath)):
1397 return False;
1398 pretmpfilename = download_from_url_file_with_httplib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1399 if(not pretmpfilename):
1400 return False;
1401 tmpfilename = pretmpfilename.get('Filename');
1402 downloadsize = int(os.path.getsize(tmpfilename));
1403 fulldatasize = 0;
1404 log.info("Moving file "+tmpfilename+" to "+filepath);
1405 exec_time_start = time.time();
1406 shutil.move(tmpfilename, filepath);
1407 try:
1408 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1409 except AttributeError:
1410 try:
1411 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1412 except ValueError:
1413 pass;
1414 except ValueError:
1415 pass;
1416 exec_time_end = time.time();
1417 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
1418 if(os.path.exists(tmpfilename)):
1419 os.remove(tmpfilename);
1420 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1421 if(outfile=="-"):
1422 pretmpfilename = download_from_url_file_with_httplib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1423 tmpfilename = pretmpfilename.get('Filename');
1424 downloadsize = int(os.path.getsize(tmpfilename));
1425 fulldatasize = 0;
1426 prevdownsize = 0;
1427 exec_time_start = time.time();
1428 with open(tmpfilename, 'rb') as ft:
1429 f = BytesIO();
1430 while True:
1431 databytes = ft.read(buffersize[1]);
1432 if not databytes: break;
1433 datasize = len(databytes);
1434 fulldatasize = datasize + fulldatasize;
1435 percentage = "";
1436 if(downloadsize>0):
1437 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1438 downloaddiff = fulldatasize - prevdownsize;
1439 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1440 prevdownsize = fulldatasize;
1441 f.write(databytes);
1442 f.seek(0);
1443 fdata = f.getvalue();
1444 f.close();
1445 ft.close();
1446 os.remove(tmpfilename);
1447 exec_time_end = time.time();
1448 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
1449 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1450 return returnval;
1452 if(havehttplib2):
1453 def download_from_url_with_httplib2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1454 global geturls_download_sleep, havezstd, havebrotli;
1455 if(sleep<0):
1456 sleep = geturls_download_sleep;
1457 if(timeout<=0):
1458 timeout = 10;
1459 urlparts = urlparse.urlparse(httpurl);
1460 if(isinstance(httpheaders, list)):
1461 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
1462 httpheaders = fix_header_names(httpheaders);
1463 if(httpuseragent is not None):
1464 if('User-Agent' in httpheaders):
1465 httpheaders['User-Agent'] = httpuseragent;
1466 else:
1467 httpuseragent.update({'User-Agent': httpuseragent});
1468 if(httpreferer is not None):
1469 if('Referer' in httpheaders):
1470 httpheaders['Referer'] = httpreferer;
1471 else:
1472 httpuseragent.update({'Referer': httpreferer});
1473 if(urlparts.username is not None or urlparts.password is not None):
1474 if(sys.version[0]=="2"):
1475 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
1476 if(sys.version[0]>="3"):
1477 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
1478 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
1479 geturls_opener = build_opener(HTTPCookieProcessor(httpcookie));
1480 geturls_opener.addheaders = httpheaders;
1481 time.sleep(sleep);
1482 if(urlparts[0]=="http"):
1483 httpconn = HTTPConnectionWithTimeout(urlparts[1], timeout=timeout);
1484 elif(urlparts[0]=="https"):
1485 httpconn = HTTPSConnectionWithTimeout(urlparts[1], timeout=timeout);
1486 else:
1487 return False;
1488 if(postdata is not None and not isinstance(postdata, dict)):
1489 postdata = urlencode(postdata);
1490 try:
1491 if(httpmethod=="GET"):
1492 httpconn.request("GET", urlparts[2], headers=httpheaders);
1493 elif(httpmethod=="POST"):
1494 httpconn.request("GET", urlparts[2], body=postdata, headers=httpheaders);
1495 else:
1496 httpconn.request("GET", urlparts[2], headers=httpheaders);
1497 except socket.timeout:
1498 log.info("Error With URL "+httpurl);
1499 return False;
1500 except socket.gaierror:
1501 log.info("Error With URL "+httpurl);
1502 return False;
1503 except BlockingIOError:
1504 log.info("Error With URL "+httpurl);
1505 return False;
1506 geturls_text = httpconn.getresponse();
1507 httpcodeout = geturls_text.status;
1508 httpcodereason = geturls_text.reason;
1509 if(geturls_text.version=="10"):
1510 httpversionout = "1.0";
1511 else:
1512 httpversionout = "1.1";
1513 httpmethodout = httpmethod;
1514 httpurlout = httpurl;
1515 httpheaderout = geturls_text.getheaders();
1516 httpheadersentout = httpheaders;
1517 if(isinstance(httpheaderout, list)):
1518 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
1519 if(sys.version[0]=="2"):
1520 try:
1521 prehttpheaderout = httpheaderout;
1522 httpheaderkeys = httpheaderout.keys();
1523 imax = len(httpheaderkeys);
1524 ic = 0;
1525 httpheaderout = {};
1526 while(ic < imax):
1527 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
1528 ic += 1;
1529 except AttributeError:
1530 pass;
1531 httpheaderout = fix_header_names(httpheaderout);
1532 if(isinstance(httpheadersentout, list)):
1533 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
1534 httpheadersentout = fix_header_names(httpheadersentout);
1535 downloadsize = httpheaderout.get('Content-Length');
1536 if(downloadsize is not None):
1537 downloadsize = int(downloadsize);
1538 if downloadsize is None: downloadsize = 0;
1539 fulldatasize = 0;
1540 prevdownsize = 0;
1541 log.info("Downloading URL "+httpurl);
1542 with BytesIO() as strbuf:
1543 while True:
1544 databytes = geturls_text.read(buffersize);
1545 if not databytes: break;
1546 datasize = len(databytes);
1547 fulldatasize = datasize + fulldatasize;
1548 percentage = "";
1549 if(downloadsize>0):
1550 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1551 downloaddiff = fulldatasize - prevdownsize;
1552 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1553 prevdownsize = fulldatasize;
1554 strbuf.write(databytes);
1555 strbuf.seek(0);
1556 returnval_content = strbuf.read();
1557 if(httpheaderout.get("Content-Encoding")=="gzip"):
1558 try:
1559 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
1560 except zlib.error:
1561 pass;
1562 elif(httpheaderout.get("Content-Encoding")=="deflate"):
1563 try:
1564 returnval_content = zlib.decompress(returnval_content);
1565 except zlib.error:
1566 pass;
1567 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
1568 try:
1569 returnval_content = brotli.decompress(returnval_content);
1570 except brotli.error:
1571 pass;
1572 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
1573 try:
1574 returnval_content = zstandard.decompress(returnval_content);
1575 except zstandard.error:
1576 pass;
1577 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
1578 try:
1579 returnval_content = lzma.decompress(returnval_content);
1580 except zstandard.error:
1581 pass;
1582 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
1583 try:
1584 returnval_content = bz2.decompress(returnval_content);
1585 except zstandard.error:
1586 pass;
1587 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "httplib2"};
1588 geturls_text.close();
1589 return returnval;
1591 if(not havehttplib2):
1592 def download_from_url_with_httplib2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1593 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
1594 return returnval;
1596 if(havehttplib2):
1597 def download_from_url_file_with_httplib2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1598 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
1599 exec_time_start = time.time();
1600 myhash = hashlib.new("sha1");
1601 if(sys.version[0]=="2"):
1602 myhash.update(httpurl);
1603 myhash.update(str(buffersize));
1604 myhash.update(str(exec_time_start));
1605 if(sys.version[0]>="3"):
1606 myhash.update(httpurl.encode('utf-8'));
1607 myhash.update(str(buffersize).encode('utf-8'));
1608 myhash.update(str(exec_time_start).encode('utf-8'));
1609 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
1610 if(sleep<0):
1611 sleep = geturls_download_sleep;
1612 if(timeout<=0):
1613 timeout = 10;
1614 pretmpfilename = download_from_url_with_httplib2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
1615 if(not pretmpfilename):
1616 return False;
1617 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
1618 tmpfilename = f.name;
1619 try:
1620 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1621 except AttributeError:
1622 try:
1623 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1624 except ValueError:
1625 pass;
1626 except ValueError:
1627 pass;
1628 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1629 f.write(pretmpfilename.get('Content'));
1630 f.close();
1631 exec_time_end = time.time();
1632 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
1633 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
1634 return returnval;
1636 if(not havehttplib2):
1637 def download_from_url_file_with_httplib2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1638 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
1639 return returnval;
1641 if(havehttplib2):
1642 def download_from_url_to_file_with_httplib2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1643 global geturls_download_sleep, havezstd, havebrotli;
1644 if(sleep<0):
1645 sleep = geturls_download_sleep;
1646 if(timeout<=0):
1647 timeout = 10;
1648 if(not outfile=="-"):
1649 outpath = outpath.rstrip(os.path.sep);
1650 filepath = os.path.realpath(outpath+os.path.sep+outfile);
1651 if(not os.path.exists(outpath)):
1652 os.makedirs(outpath);
1653 if(os.path.exists(outpath) and os.path.isfile(outpath)):
1654 return False;
1655 if(os.path.exists(filepath) and os.path.isdir(filepath)):
1656 return False;
1657 pretmpfilename = download_from_url_file_with_httplib2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1658 if(not pretmpfilename):
1659 return False;
1660 tmpfilename = pretmpfilename.get('Filename');
1661 downloadsize = int(os.path.getsize(tmpfilename));
1662 fulldatasize = 0;
1663 log.info("Moving file "+tmpfilename+" to "+filepath);
1664 exec_time_start = time.time();
1665 shutil.move(tmpfilename, filepath);
1666 try:
1667 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1668 except AttributeError:
1669 try:
1670 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1671 except ValueError:
1672 pass;
1673 except ValueError:
1674 pass;
1675 exec_time_end = time.time();
1676 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
1677 if(os.path.exists(tmpfilename)):
1678 os.remove(tmpfilename);
1679 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1680 if(outfile=="-"):
1681 pretmpfilename = download_from_url_file_with_httplib2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1682 tmpfilename = pretmpfilename.get('Filename');
1683 downloadsize = int(os.path.getsize(tmpfilename));
1684 fulldatasize = 0;
1685 prevdownsize = 0;
1686 exec_time_start = time.time();
1687 with open(tmpfilename, 'rb') as ft:
1688 f = BytesIO();
1689 while True:
1690 databytes = ft.read(buffersize[1]);
1691 if not databytes: break;
1692 datasize = len(databytes);
1693 fulldatasize = datasize + fulldatasize;
1694 percentage = "";
1695 if(downloadsize>0):
1696 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1697 downloaddiff = fulldatasize - prevdownsize;
1698 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1699 prevdownsize = fulldatasize;
1700 f.write(databytes);
1701 f.seek(0);
1702 fdata = f.getvalue();
1703 f.close();
1704 ft.close();
1705 os.remove(tmpfilename);
1706 exec_time_end = time.time();
1707 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
1708 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1709 return returnval;
1711 if(not havehttplib2):
1712 def download_from_url_to_file_with_httplib2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1713 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
1714 return returnval;
1716 def download_from_url_with_request(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1717 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
1718 return returnval;
1720 def download_from_url_file_with_request(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1721 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
1722 return returnval;
1724 def download_from_url_to_file_with_request(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1725 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
1726 return returnval;
1728 if(haverequests):
1729 def download_from_url_with_requests(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1730 global geturls_download_sleep, havezstd, havebrotli;
1731 if(sleep<0):
1732 sleep = geturls_download_sleep;
1733 if(timeout<=0):
1734 timeout = 10;
1735 urlparts = urlparse.urlparse(httpurl);
1736 if(isinstance(httpheaders, list)):
1737 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
1738 httpheaders = fix_header_names(httpheaders);
1739 if(httpuseragent is not None):
1740 if('User-Agent' in httpheaders):
1741 httpheaders['User-Agent'] = httpuseragent;
1742 else:
1743 httpuseragent.update({'User-Agent': httpuseragent});
1744 if(httpreferer is not None):
1745 if('Referer' in httpheaders):
1746 httpheaders['Referer'] = httpreferer;
1747 else:
1748 httpuseragent.update({'Referer': httpreferer});
1749 if(urlparts.username is not None or urlparts.password is not None):
1750 if(sys.version[0]=="2"):
1751 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
1752 if(sys.version[0]>="3"):
1753 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
1754 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
1755 time.sleep(sleep);
1756 if(postdata is not None and not isinstance(postdata, dict)):
1757 postdata = urlencode(postdata);
1758 try:
1759 reqsession = requests.Session();
1760 if(httpmethod=="GET"):
1761 geturls_text = reqsession.get(httpurl, headers=httpheaders, cookies=httpcookie, stream=True);
1762 elif(httpmethod=="POST"):
1763 geturls_text = reqsession.post(httpurl, data=postdata, headers=httpheaders, cookies=httpcookie, stream=True);
1764 else:
1765 geturls_text = reqsession.get(httpurl, headers=httpheaders, cookies=httpcookie, stream=True);
1766 except requests.exceptions.ConnectTimeout:
1767 log.info("Error With URL "+httpurl);
1768 return False;
1769 except requests.exceptions.ConnectError:
1770 log.info("Error With URL "+httpurl);
1771 return False;
1772 except socket.timeout:
1773 log.info("Error With URL "+httpurl);
1774 return False;
1775 httpcodeout = geturls_text.status_code;
1776 httpcodereason = geturls_text.reason;
1777 if(geturls_text.raw.version=="10"):
1778 httpversionout = "1.0";
1779 else:
1780 httpversionout = "1.1";
1781 httpmethodout = httpmethod;
1782 httpurlout = geturls_text.url;
1783 httpheaderout = geturls_text.headers;
1784 httpheadersentout = geturls_text.request.headers;
1785 if(isinstance(httpheaderout, list)):
1786 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
1787 if(sys.version[0]=="2"):
1788 try:
1789 prehttpheaderout = httpheaderout;
1790 httpheaderkeys = httpheaderout.keys();
1791 imax = len(httpheaderkeys);
1792 ic = 0;
1793 httpheaderout = {};
1794 while(ic < imax):
1795 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
1796 ic += 1;
1797 except AttributeError:
1798 pass;
1799 httpheaderout = fix_header_names(httpheaderout);
1800 if(isinstance(httpheadersentout, list)):
1801 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
1802 httpheadersentout = fix_header_names(httpheadersentout);
1803 downloadsize = httpheaderout.get('Content-Length');
1804 if(downloadsize is not None):
1805 downloadsize = int(downloadsize);
1806 if downloadsize is None: downloadsize = 0;
1807 fulldatasize = 0;
1808 prevdownsize = 0;
1809 log.info("Downloading URL "+httpurl);
1810 with BytesIO() as strbuf:
1811 while True:
1812 databytes = geturls_text.raw.read(buffersize);
1813 if not databytes: break;
1814 datasize = len(databytes);
1815 fulldatasize = datasize + fulldatasize;
1816 percentage = "";
1817 if(downloadsize>0):
1818 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1819 downloaddiff = fulldatasize - prevdownsize;
1820 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1821 prevdownsize = fulldatasize;
1822 strbuf.write(databytes);
1823 strbuf.seek(0);
1824 returnval_content = strbuf.read();
1825 if(httpheaderout.get("Content-Encoding")=="gzip"):
1826 try:
1827 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
1828 except zlib.error:
1829 pass;
1830 elif(httpheaderout.get("Content-Encoding")=="deflate"):
1831 try:
1832 returnval_content = zlib.decompress(returnval_content);
1833 except zlib.error:
1834 pass;
1835 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
1836 try:
1837 returnval_content = brotli.decompress(returnval_content);
1838 except brotli.error:
1839 pass;
1840 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
1841 try:
1842 returnval_content = zstandard.decompress(returnval_content);
1843 except zstandard.error:
1844 pass;
1845 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
1846 try:
1847 returnval_content = lzma.decompress(returnval_content);
1848 except zstandard.error:
1849 pass;
1850 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
1851 try:
1852 returnval_content = bz2.decompress(returnval_content);
1853 except zstandard.error:
1854 pass;
1855 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "requests"};
1856 geturls_text.close();
1857 return returnval;
1859 if(not haverequests):
1860 def download_from_url_with_requests(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1861 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
1862 return returnval;
1864 if(haverequests):
1865 def download_from_url_file_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1866 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
1867 exec_time_start = time.time();
1868 myhash = hashlib.new("sha1");
1869 if(sys.version[0]=="2"):
1870 myhash.update(httpurl);
1871 myhash.update(str(buffersize));
1872 myhash.update(str(exec_time_start));
1873 if(sys.version[0]>="3"):
1874 myhash.update(httpurl.encode('utf-8'));
1875 myhash.update(str(buffersize).encode('utf-8'));
1876 myhash.update(str(exec_time_start).encode('utf-8'));
1877 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
1878 if(sleep<0):
1879 sleep = geturls_download_sleep;
1880 if(timeout<=0):
1881 timeout = 10;
1882 pretmpfilename = download_from_url_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
1883 if(not pretmpfilename):
1884 return False;
1885 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
1886 tmpfilename = f.name;
1887 try:
1888 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1889 except AttributeError:
1890 try:
1891 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1892 except ValueError:
1893 pass;
1894 except ValueError:
1895 pass;
1896 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1897 f.write(pretmpfilename.get('Content'));
1898 f.close();
1899 exec_time_end = time.time();
1900 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
1901 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
1902 return returnval;
1904 if(not haverequests):
1905 def download_from_url_file_with_requests(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1906 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
1907 return returnval;
1909 if(haverequests):
1910 def download_from_url_to_file_with_requests(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1911 global geturls_download_sleep, havezstd, havebrotli;
1912 if(sleep<0):
1913 sleep = geturls_download_sleep;
1914 if(timeout<=0):
1915 timeout = 10;
1916 if(not outfile=="-"):
1917 outpath = outpath.rstrip(os.path.sep);
1918 filepath = os.path.realpath(outpath+os.path.sep+outfile);
1919 if(not os.path.exists(outpath)):
1920 os.makedirs(outpath);
1921 if(os.path.exists(outpath) and os.path.isfile(outpath)):
1922 return False;
1923 if(os.path.exists(filepath) and os.path.isdir(filepath)):
1924 return False;
1925 pretmpfilename = download_from_url_file_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1926 if(not pretmpfilename):
1927 return False;
1928 tmpfilename = pretmpfilename.get('Filename');
1929 downloadsize = int(os.path.getsize(tmpfilename));
1930 fulldatasize = 0;
1931 log.info("Moving file "+tmpfilename+" to "+filepath);
1932 exec_time_start = time.time();
1933 shutil.move(tmpfilename, filepath);
1934 try:
1935 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1936 except AttributeError:
1937 try:
1938 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1939 except ValueError:
1940 pass;
1941 except ValueError:
1942 pass;
1943 exec_time_end = time.time();
1944 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
1945 if(os.path.exists(tmpfilename)):
1946 os.remove(tmpfilename);
1947 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1948 if(outfile=="-"):
1949 pretmpfilename = download_from_url_file_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1950 tmpfilename = pretmpfilename.get('Filename');
1951 downloadsize = int(os.path.getsize(tmpfilename));
1952 fulldatasize = 0;
1953 prevdownsize = 0;
1954 exec_time_start = time.time();
1955 with open(tmpfilename, 'rb') as ft:
1956 f = BytesIO();
1957 while True:
1958 databytes = ft.read(buffersize[1]);
1959 if not databytes: break;
1960 datasize = len(databytes);
1961 fulldatasize = datasize + fulldatasize;
1962 percentage = "";
1963 if(downloadsize>0):
1964 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1965 downloaddiff = fulldatasize - prevdownsize;
1966 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1967 prevdownsize = fulldatasize;
1968 f.write(databytes);
1969 f.seek(0);
1970 fdata = f.getvalue();
1971 f.close();
1972 ft.close();
1973 os.remove(tmpfilename);
1974 exec_time_end = time.time();
1975 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
1976 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1977 return returnval;
1979 if(not haverequests):
1980 def download_from_url_to_file_with_requests(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1981 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
1982 return returnval;
1984 if(haveaiohttp):
1985 def download_from_url_with_aiohttp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1986 global geturls_download_sleep, havezstd, havebrotli;
1987 if(sleep<0):
1988 sleep = geturls_download_sleep;
1989 if(timeout<=0):
1990 timeout = 10;
1991 urlparts = urlparse.urlparse(httpurl);
1992 if(isinstance(httpheaders, list)):
1993 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
1994 httpheaders = fix_header_names(httpheaders);
1995 if(httpuseragent is not None):
1996 if('User-Agent' in httpheaders):
1997 httpheaders['User-Agent'] = httpuseragent;
1998 else:
1999 httpuseragent.update({'User-Agent': httpuseragent});
2000 if(httpreferer is not None):
2001 if('Referer' in httpheaders):
2002 httpheaders['Referer'] = httpreferer;
2003 else:
2004 httpuseragent.update({'Referer': httpreferer});
2005 if(urlparts.username is not None or urlparts.password is not None):
2006 if(sys.version[0]=="2"):
2007 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
2008 if(sys.version[0]>="3"):
2009 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
2010 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
2011 time.sleep(sleep);
2012 if(postdata is not None and not isinstance(postdata, dict)):
2013 postdata = urlencode(postdata);
2014 try:
2015 reqsession = aiohttp.ClientSession(cookie_jar=httpcookie, headers=httpheaders, timeout=timeout, read_timeout=timeout, conn_timeout=timeout, read_bufsize=buffersize);
2016 if(httpmethod=="GET"):
2017 geturls_text = reqsession.get(httpurl);
2018 elif(httpmethod=="POST"):
2019 geturls_text = reqsession.post(httpurl, data=postdata);
2020 else:
2021 geturls_text = reqsession.get(httpurl);
2022 except aiohttp.exceptions.ConnectTimeout:
2023 log.info("Error With URL "+httpurl);
2024 return False;
2025 except aiohttp.exceptions.ConnectError:
2026 log.info("Error With URL "+httpurl);
2027 return False;
2028 except socket.timeout:
2029 log.info("Error With URL "+httpurl);
2030 return False;
2031 httpcodeout = geturls_text.status;
2032 httpcodereason = geturls_text.reason;
2033 httpversionout = geturls_text.version;
2034 httpmethodout = geturls_text.method;
2035 httpurlout = geturls_text.url;
2036 httpheaderout = geturls_text.headers;
2037 httpheadersentout = geturls_text.request_info.headers;
2038 if(isinstance(httpheaderout, list)):
2039 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
2040 if(sys.version[0]=="2"):
2041 try:
2042 prehttpheaderout = httpheaderout;
2043 httpheaderkeys = httpheaderout.keys();
2044 imax = len(httpheaderkeys);
2045 ic = 0;
2046 httpheaderout = {};
2047 while(ic < imax):
2048 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
2049 ic += 1;
2050 except AttributeError:
2051 pass;
2052 httpheaderout = fix_header_names(httpheaderout);
2053 if(isinstance(httpheadersentout, list)):
2054 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
2055 httpheadersentout = fix_header_names(httpheadersentout);
2056 downloadsize = httpheaderout.get('Content-Length');
2057 if(downloadsize is not None):
2058 downloadsize = int(downloadsize);
2059 if downloadsize is None: downloadsize = 0;
2060 fulldatasize = 0;
2061 prevdownsize = 0;
2062 log.info("Downloading URL "+httpurl);
2063 with BytesIO() as strbuf:
2064 while True:
2065 databytes = geturls_text.read(buffersize);
2066 if not databytes: break;
2067 datasize = len(databytes);
2068 fulldatasize = datasize + fulldatasize;
2069 percentage = "";
2070 if(downloadsize>0):
2071 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2072 downloaddiff = fulldatasize - prevdownsize;
2073 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2074 prevdownsize = fulldatasize;
2075 strbuf.write(databytes);
2076 strbuf.seek(0);
2077 returnval_content = strbuf.read();
2078 if(httpheaderout.get("Content-Encoding")=="gzip"):
2079 try:
2080 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
2081 except zlib.error:
2082 pass;
2083 elif(httpheaderout.get("Content-Encoding")=="deflate"):
2084 try:
2085 returnval_content = zlib.decompress(returnval_content);
2086 except zlib.error:
2087 pass;
2088 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
2089 try:
2090 returnval_content = brotli.decompress(returnval_content);
2091 except brotli.error:
2092 pass;
2093 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
2094 try:
2095 returnval_content = zstandard.decompress(returnval_content);
2096 except zstandard.error:
2097 pass;
2098 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
2099 try:
2100 returnval_content = lzma.decompress(returnval_content);
2101 except zstandard.error:
2102 pass;
2103 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
2104 try:
2105 returnval_content = bz2.decompress(returnval_content);
2106 except zstandard.error:
2107 pass;
2108 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "aiohttp"};
2109 geturls_text.close();
2110 return returnval;
2112 if(not haveaiohttp):
2113 def download_from_url_with_aiohttp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2114 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
2115 return returnval;
2117 if(haveaiohttp):
2118 def download_from_url_file_with_aiohttp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2119 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
2120 exec_time_start = time.time();
2121 myhash = hashlib.new("sha1");
2122 if(sys.version[0]=="2"):
2123 myhash.update(httpurl);
2124 myhash.update(str(buffersize));
2125 myhash.update(str(exec_time_start));
2126 if(sys.version[0]>="3"):
2127 myhash.update(httpurl.encode('utf-8'));
2128 myhash.update(str(buffersize).encode('utf-8'));
2129 myhash.update(str(exec_time_start).encode('utf-8'));
2130 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
2131 if(sleep<0):
2132 sleep = geturls_download_sleep;
2133 if(timeout<=0):
2134 timeout = 10;
2135 pretmpfilename = download_from_url_with_aiohttp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
2136 if(not pretmpfilename):
2137 return False;
2138 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
2139 tmpfilename = f.name;
2140 try:
2141 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2142 except AttributeError:
2143 try:
2144 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2145 except ValueError:
2146 pass;
2147 except ValueError:
2148 pass;
2149 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2150 f.write(pretmpfilename.get('Content'));
2151 f.close();
2152 exec_time_end = time.time();
2153 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
2154 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
2155 return returnval;
2157 if(not haveaiohttp):
2158 def download_from_url_file_with_aiohttp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2159 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
2160 return returnval;
2162 if(haveaiohttp):
2163 def download_from_url_to_file_with_aiohttp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2164 global geturls_download_sleep, havezstd, havebrotli;
2165 if(sleep<0):
2166 sleep = geturls_download_sleep;
2167 if(timeout<=0):
2168 timeout = 10;
2169 if(not outfile=="-"):
2170 outpath = outpath.rstrip(os.path.sep);
2171 filepath = os.path.realpath(outpath+os.path.sep+outfile);
2172 if(not os.path.exists(outpath)):
2173 os.makedirs(outpath);
2174 if(os.path.exists(outpath) and os.path.isfile(outpath)):
2175 return False;
2176 if(os.path.exists(filepath) and os.path.isdir(filepath)):
2177 return False;
2178 pretmpfilename = download_from_url_file_with_aiohttp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2179 if(not pretmpfilename):
2180 return False;
2181 tmpfilename = pretmpfilename.get('Filename');
2182 downloadsize = int(os.path.getsize(tmpfilename));
2183 fulldatasize = 0;
2184 log.info("Moving file "+tmpfilename+" to "+filepath);
2185 exec_time_start = time.time();
2186 shutil.move(tmpfilename, filepath);
2187 try:
2188 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2189 except AttributeError:
2190 try:
2191 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2192 except ValueError:
2193 pass;
2194 except ValueError:
2195 pass;
2196 exec_time_end = time.time();
2197 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
2198 if(os.path.exists(tmpfilename)):
2199 os.remove(tmpfilename);
2200 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2201 if(outfile=="-"):
2202 pretmpfilename = download_from_url_file_with_aiohttp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2203 tmpfilename = pretmpfilename.get('Filename');
2204 downloadsize = int(os.path.getsize(tmpfilename));
2205 fulldatasize = 0;
2206 prevdownsize = 0;
2207 exec_time_start = time.time();
2208 with open(tmpfilename, 'rb') as ft:
2209 f = BytesIO();
2210 while True:
2211 databytes = ft.read(buffersize[1]);
2212 if not databytes: break;
2213 datasize = len(databytes);
2214 fulldatasize = datasize + fulldatasize;
2215 percentage = "";
2216 if(downloadsize>0):
2217 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2218 downloaddiff = fulldatasize - prevdownsize;
2219 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2220 prevdownsize = fulldatasize;
2221 f.write(databytes);
2222 f.seek(0);
2223 fdata = f.getvalue();
2224 f.close();
2225 ft.close();
2226 os.remove(tmpfilename);
2227 exec_time_end = time.time();
2228 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
2229 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2230 return returnval;
2232 if(not haveaiohttp):
2233 def download_from_url_to_file_with_aiohttp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2234 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
2235 return returnval;
2237 if(havehttpx):
2238 def download_from_url_with_httpx(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2239 global geturls_download_sleep, havezstd, havebrotli;
2240 if(sleep<0):
2241 sleep = geturls_download_sleep;
2242 if(timeout<=0):
2243 timeout = 10;
2244 urlparts = urlparse.urlparse(httpurl);
2245 if(isinstance(httpheaders, list)):
2246 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
2247 httpheaders = fix_header_names(httpheaders);
2248 if(httpuseragent is not None):
2249 if('User-Agent' in httpheaders):
2250 httpheaders['User-Agent'] = httpuseragent;
2251 else:
2252 httpuseragent.update({'User-Agent': httpuseragent});
2253 if(httpreferer is not None):
2254 if('Referer' in httpheaders):
2255 httpheaders['Referer'] = httpreferer;
2256 else:
2257 httpuseragent.update({'Referer': httpreferer});
2258 if(urlparts.username is not None or urlparts.password is not None):
2259 if(sys.version[0]=="2"):
2260 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
2261 if(sys.version[0]>="3"):
2262 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
2263 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
2264 time.sleep(sleep);
2265 if(postdata is not None and not isinstance(postdata, dict)):
2266 postdata = urlencode(postdata);
2267 try:
2268 if(httpmethod=="GET"):
2269 httpx_pool = httpx.Client(http1=True, http2=False, trust_env=True);
2270 geturls_text = httpx_pool.get(httpurl, timeout=timeout, headers=httpheaders, cookies=httpcookie);
2271 elif(httpmethod=="POST"):
2272 httpx_pool = httpx.Client(http1=True, http2=False, trust_env=True);
2273 geturls_text = httpx_pool.post(httpurl, timeout=timeout, data=postdata, headers=httpheaders, cookies=httpcookie);
2274 else:
2275 httpx_pool = httpx.Client(http1=True, http2=False, trust_env=True);
2276 geturls_text = httpx_pool.get(httpurl, timeout=timeout, headers=httpheaders, cookies=httpcookie);
2277 except httpx.ConnectTimeout:
2278 log.info("Error With URL "+httpurl);
2279 return False;
2280 except httpx.ConnectError:
2281 log.info("Error With URL "+httpurl);
2282 return False;
2283 except socket.timeout:
2284 log.info("Error With URL "+httpurl);
2285 return False;
2286 httpcodeout = geturls_text.status_code;
2287 try:
2288 httpcodereason = geturls_text.reason_phrase;
2289 except:
2290 httpcodereason = http_status_to_reason(geturls_text.status_code);
2291 httpversionout = geturls_text.http_version;
2292 httpmethodout = httpmethod;
2293 httpurlout = str(geturls_text.url);
2294 httpheaderout = geturls_text.headers;
2295 httpheadersentout = geturls_text.request.headers;
2296 if(isinstance(httpheaderout, list)):
2297 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
2298 if(sys.version[0]=="2"):
2299 try:
2300 prehttpheaderout = httpheaderout;
2301 httpheaderkeys = httpheaderout.keys();
2302 imax = len(httpheaderkeys);
2303 ic = 0;
2304 httpheaderout = {};
2305 while(ic < imax):
2306 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
2307 ic += 1;
2308 except AttributeError:
2309 pass;
2310 httpheaderout = fix_header_names(httpheaderout);
2311 if(isinstance(httpheadersentout, list)):
2312 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
2313 httpheadersentout = fix_header_names(httpheadersentout);
2314 downloadsize = httpheaderout.get('Content-Length');
2315 if(downloadsize is not None):
2316 downloadsize = int(downloadsize);
2317 if downloadsize is None: downloadsize = 0;
2318 fulldatasize = 0;
2319 prevdownsize = 0;
2320 log.info("Downloading URL "+httpurl);
2321 with BytesIO() as strbuf:
2322 while True:
2323 databytes = geturls_text.read();
2324 if not databytes: break;
2325 datasize = len(databytes);
2326 fulldatasize = datasize + fulldatasize;
2327 percentage = "";
2328 if(downloadsize>0):
2329 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2330 downloaddiff = fulldatasize - prevdownsize;
2331 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2332 prevdownsize = fulldatasize;
2333 strbuf.write(databytes);
2334 break;
2335 strbuf.seek(0);
2336 returnval_content = strbuf.read();
2337 geturls_text.close();
2338 if(httpheaderout.get("Content-Encoding")=="gzip"):
2339 try:
2340 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
2341 except zlib.error:
2342 pass;
2343 elif(httpheaderout.get("Content-Encoding")=="deflate"):
2344 try:
2345 returnval_content = zlib.decompress(returnval_content);
2346 except zlib.error:
2347 pass;
2348 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
2349 try:
2350 returnval_content = brotli.decompress(returnval_content);
2351 except brotli.error:
2352 pass;
2353 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
2354 try:
2355 returnval_content = zstandard.decompress(returnval_content);
2356 except zstandard.error:
2357 pass;
2358 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
2359 try:
2360 returnval_content = lzma.decompress(returnval_content);
2361 except zstandard.error:
2362 pass;
2363 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
2364 try:
2365 returnval_content = bz2.decompress(returnval_content);
2366 except zstandard.error:
2367 pass;
2368 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "httpx"};
2369 geturls_text.close();
2370 return returnval;
2372 if(not havehttpx):
2373 def download_from_url_with_httpx(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2374 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
2375 return returnval;
2377 if(havehttpx):
2378 def download_from_url_file_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2379 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
2380 exec_time_start = time.time();
2381 myhash = hashlib.new("sha1");
2382 if(sys.version[0]=="2"):
2383 myhash.update(httpurl);
2384 myhash.update(str(buffersize));
2385 myhash.update(str(exec_time_start));
2386 if(sys.version[0]>="3"):
2387 myhash.update(httpurl.encode('utf-8'));
2388 myhash.update(str(buffersize).encode('utf-8'));
2389 myhash.update(str(exec_time_start).encode('utf-8'));
2390 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
2391 if(sleep<0):
2392 sleep = geturls_download_sleep;
2393 if(timeout<=0):
2394 timeout = 10;
2395 pretmpfilename = download_from_url_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
2396 if(not pretmpfilename):
2397 return False;
2398 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
2399 tmpfilename = f.name;
2400 try:
2401 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2402 except AttributeError:
2403 try:
2404 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2405 except ValueError:
2406 pass;
2407 except ValueError:
2408 pass;
2409 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2410 f.write(pretmpfilename.get('Content'));
2411 f.close();
2412 exec_time_end = time.time();
2413 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
2414 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
2415 return returnval;
2417 if(not havehttpx):
2418 def download_from_url_file_with_httpx(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2419 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
2420 return returnval;
2422 if(havehttpx):
2423 def download_from_url_to_file_with_httpx(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2424 global geturls_download_sleep, havezstd, havebrotli;
2425 if(sleep<0):
2426 sleep = geturls_download_sleep;
2427 if(timeout<=0):
2428 timeout = 10;
2429 if(not outfile=="-"):
2430 outpath = outpath.rstrip(os.path.sep);
2431 filepath = os.path.realpath(outpath+os.path.sep+outfile);
2432 if(not os.path.exists(outpath)):
2433 os.makedirs(outpath);
2434 if(os.path.exists(outpath) and os.path.isfile(outpath)):
2435 return False;
2436 if(os.path.exists(filepath) and os.path.isdir(filepath)):
2437 return False;
2438 pretmpfilename = download_from_url_file_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2439 if(not pretmpfilename):
2440 return False;
2441 tmpfilename = pretmpfilename.get('Filename');
2442 downloadsize = int(os.path.getsize(tmpfilename));
2443 fulldatasize = 0;
2444 log.info("Moving file "+tmpfilename+" to "+filepath);
2445 exec_time_start = time.time();
2446 shutil.move(tmpfilename, filepath);
2447 try:
2448 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2449 except AttributeError:
2450 try:
2451 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2452 except ValueError:
2453 pass;
2454 except ValueError:
2455 pass;
2456 exec_time_end = time.time();
2457 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
2458 if(os.path.exists(tmpfilename)):
2459 os.remove(tmpfilename);
2460 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2461 if(outfile=="-"):
2462 pretmpfilename = download_from_url_file_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2463 tmpfilename = pretmpfilename.get('Filename');
2464 downloadsize = int(os.path.getsize(tmpfilename));
2465 fulldatasize = 0;
2466 prevdownsize = 0;
2467 exec_time_start = time.time();
2468 with open(tmpfilename, 'rb') as ft:
2469 f = BytesIO();
2470 while True:
2471 databytes = ft.read(buffersize[1]);
2472 if not databytes: break;
2473 datasize = len(databytes);
2474 fulldatasize = datasize + fulldatasize;
2475 percentage = "";
2476 if(downloadsize>0):
2477 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2478 downloaddiff = fulldatasize - prevdownsize;
2479 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2480 prevdownsize = fulldatasize;
2481 f.write(databytes);
2482 f.seek(0);
2483 fdata = f.getvalue();
2484 f.close();
2485 ft.close();
2486 os.remove(tmpfilename);
2487 exec_time_end = time.time();
2488 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
2489 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2490 return returnval;
2492 if(not havehttpx):
2493 def download_from_url_to_file_with_httpx(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2494 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
2495 return returnval;
2497 if(havehttpx):
2498 def download_from_url_with_httpx2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2499 global geturls_download_sleep, havezstd, havebrotli;
2500 if(sleep<0):
2501 sleep = geturls_download_sleep;
2502 if(timeout<=0):
2503 timeout = 10;
2504 urlparts = urlparse.urlparse(httpurl);
2505 if(isinstance(httpheaders, list)):
2506 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
2507 httpheaders = fix_header_names(httpheaders);
2508 if(httpuseragent is not None):
2509 if('User-Agent' in httpheaders):
2510 httpheaders['User-Agent'] = httpuseragent;
2511 else:
2512 httpuseragent.update({'User-Agent': httpuseragent});
2513 if(httpreferer is not None):
2514 if('Referer' in httpheaders):
2515 httpheaders['Referer'] = httpreferer;
2516 else:
2517 httpuseragent.update({'Referer': httpreferer});
2518 if(urlparts.username is not None or urlparts.password is not None):
2519 if(sys.version[0]=="2"):
2520 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
2521 if(sys.version[0]>="3"):
2522 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
2523 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
2524 time.sleep(sleep);
2525 if(postdata is not None and not isinstance(postdata, dict)):
2526 postdata = urlencode(postdata);
2527 try:
2528 if(httpmethod=="GET"):
2529 httpx_pool = httpx.Client(http1=True, http2=True, trust_env=True);
2530 geturls_text = httpx_pool.get(httpurl, timeout=timeout, headers=httpheaders, cookies=httpcookie);
2531 elif(httpmethod=="POST"):
2532 httpx_pool = httpx.Client(http1=True, http2=True, trust_env=True);
2533 geturls_text = httpx_pool.post(httpurl, timeout=timeout, data=postdata, headers=httpheaders, cookies=httpcookie);
2534 else:
2535 httpx_pool = httpx.Client(http1=True, http2=True, trust_env=True);
2536 geturls_text = httpx_pool.get(httpurl, timeout=timeout, headers=httpheaders, cookies=httpcookie);
2537 except httpx.ConnectTimeout:
2538 log.info("Error With URL "+httpurl);
2539 return False;
2540 except httpx.ConnectError:
2541 log.info("Error With URL "+httpurl);
2542 return False;
2543 except socket.timeout:
2544 log.info("Error With URL "+httpurl);
2545 return False;
2546 httpcodeout = geturls_text.status_code;
2547 try:
2548 httpcodereason = geturls_text.reason_phrase;
2549 except:
2550 httpcodereason = http_status_to_reason(geturls_text.status_code);
2551 httpversionout = geturls_text.http_version;
2552 httpmethodout = httpmethod;
2553 httpurlout = str(geturls_text.url);
2554 httpheaderout = geturls_text.headers;
2555 httpheadersentout = geturls_text.request.headers;
2556 if(isinstance(httpheaderout, list)):
2557 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
2558 if(sys.version[0]=="2"):
2559 try:
2560 prehttpheaderout = httpheaderout;
2561 httpheaderkeys = httpheaderout.keys();
2562 imax = len(httpheaderkeys);
2563 ic = 0;
2564 httpheaderout = {};
2565 while(ic < imax):
2566 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
2567 ic += 1;
2568 except AttributeError:
2569 pass;
2570 httpheaderout = fix_header_names(httpheaderout);
2571 if(isinstance(httpheadersentout, list)):
2572 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
2573 httpheadersentout = fix_header_names(httpheadersentout);
2574 downloadsize = httpheaderout.get('Content-Length');
2575 if(downloadsize is not None):
2576 downloadsize = int(downloadsize);
2577 if downloadsize is None: downloadsize = 0;
2578 fulldatasize = 0;
2579 prevdownsize = 0;
2580 log.info("Downloading URL "+httpurl);
2581 with BytesIO() as strbuf:
2582 while True:
2583 databytes = geturls_text.read();
2584 if not databytes: break;
2585 datasize = len(databytes);
2586 fulldatasize = datasize + fulldatasize;
2587 percentage = "";
2588 if(downloadsize>0):
2589 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2590 downloaddiff = fulldatasize - prevdownsize;
2591 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2592 prevdownsize = fulldatasize;
2593 strbuf.write(databytes);
2594 break;
2595 strbuf.seek(0);
2596 returnval_content = strbuf.read();
2597 geturls_text.close();
2598 if(httpheaderout.get("Content-Encoding")=="gzip"):
2599 try:
2600 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
2601 except zlib.error:
2602 pass;
2603 elif(httpheaderout.get("Content-Encoding")=="deflate"):
2604 try:
2605 returnval_content = zlib.decompress(returnval_content);
2606 except zlib.error:
2607 pass;
2608 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
2609 try:
2610 returnval_content = brotli.decompress(returnval_content);
2611 except brotli.error:
2612 pass;
2613 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
2614 try:
2615 returnval_content = zstandard.decompress(returnval_content);
2616 except zstandard.error:
2617 pass;
2618 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
2619 try:
2620 returnval_content = lzma.decompress(returnval_content);
2621 except zstandard.error:
2622 pass;
2623 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
2624 try:
2625 returnval_content = bz2.decompress(returnval_content);
2626 except zstandard.error:
2627 pass;
2628 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "httpx2"};
2629 geturls_text.close();
2630 return returnval;
2632 if(not havehttpx):
2633 def download_from_url_with_httpx2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2634 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
2635 return returnval;
2637 if(havehttpx):
2638 def download_from_url_file_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2639 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
2640 exec_time_start = time.time();
2641 myhash = hashlib.new("sha1");
2642 if(sys.version[0]=="2"):
2643 myhash.update(httpurl);
2644 myhash.update(str(buffersize));
2645 myhash.update(str(exec_time_start));
2646 if(sys.version[0]>="3"):
2647 myhash.update(httpurl.encode('utf-8'));
2648 myhash.update(str(buffersize).encode('utf-8'));
2649 myhash.update(str(exec_time_start).encode('utf-8'));
2650 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
2651 if(sleep<0):
2652 sleep = geturls_download_sleep;
2653 if(timeout<=0):
2654 timeout = 10;
2655 pretmpfilename = download_from_url_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
2656 if(not pretmpfilename):
2657 return False;
2658 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
2659 tmpfilename = f.name;
2660 try:
2661 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2662 except AttributeError:
2663 try:
2664 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2665 except ValueError:
2666 pass;
2667 except ValueError:
2668 pass;
2669 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2670 f.write(pretmpfilename.get('Content'));
2671 f.close();
2672 exec_time_end = time.time();
2673 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
2674 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
2675 return returnval;
2677 if(not havehttpx):
2678 def download_from_url_file_with_httpx2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2679 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
2680 return returnval;
2682 if(havehttpx):
2683 def download_from_url_to_file_with_httpx2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2684 global geturls_download_sleep, havezstd, havebrotli;
2685 if(sleep<0):
2686 sleep = geturls_download_sleep;
2687 if(timeout<=0):
2688 timeout = 10;
2689 if(not outfile=="-"):
2690 outpath = outpath.rstrip(os.path.sep);
2691 filepath = os.path.realpath(outpath+os.path.sep+outfile);
2692 if(not os.path.exists(outpath)):
2693 os.makedirs(outpath);
2694 if(os.path.exists(outpath) and os.path.isfile(outpath)):
2695 return False;
2696 if(os.path.exists(filepath) and os.path.isdir(filepath)):
2697 return False;
2698 pretmpfilename = download_from_url_file_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2699 if(not pretmpfilename):
2700 return False;
2701 tmpfilename = pretmpfilename.get('Filename');
2702 downloadsize = int(os.path.getsize(tmpfilename));
2703 fulldatasize = 0;
2704 log.info("Moving file "+tmpfilename+" to "+filepath);
2705 exec_time_start = time.time();
2706 shutil.move(tmpfilename, filepath);
2707 try:
2708 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2709 except AttributeError:
2710 try:
2711 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2712 except ValueError:
2713 pass;
2714 except ValueError:
2715 pass;
2716 exec_time_end = time.time();
2717 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
2718 if(os.path.exists(tmpfilename)):
2719 os.remove(tmpfilename);
2720 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2721 if(outfile=="-"):
2722 pretmpfilename = download_from_url_file_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2723 tmpfilename = pretmpfilename.get('Filename');
2724 downloadsize = int(os.path.getsize(tmpfilename));
2725 fulldatasize = 0;
2726 prevdownsize = 0;
2727 exec_time_start = time.time();
2728 with open(tmpfilename, 'rb') as ft:
2729 f = BytesIO();
2730 while True:
2731 databytes = ft.read(buffersize[1]);
2732 if not databytes: break;
2733 datasize = len(databytes);
2734 fulldatasize = datasize + fulldatasize;
2735 percentage = "";
2736 if(downloadsize>0):
2737 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2738 downloaddiff = fulldatasize - prevdownsize;
2739 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2740 prevdownsize = fulldatasize;
2741 f.write(databytes);
2742 f.seek(0);
2743 fdata = f.getvalue();
2744 f.close();
2745 ft.close();
2746 os.remove(tmpfilename);
2747 exec_time_end = time.time();
2748 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
2749 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2750 return returnval;
2752 if(not havehttpx):
2753 def download_from_url_to_file_with_httpx2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2754 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
2755 return returnval;
2757 if(havehttpcore):
2758 def download_from_url_with_httpcore(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2759 global geturls_download_sleep, havezstd, havebrotli;
2760 if(sleep<0):
2761 sleep = geturls_download_sleep;
2762 if(timeout<=0):
2763 timeout = 10;
2764 urlparts = urlparse.urlparse(httpurl);
2765 if(isinstance(httpheaders, list)):
2766 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
2767 httpheaders = fix_header_names(httpheaders);
2768 if(httpuseragent is not None):
2769 if('User-Agent' in httpheaders):
2770 httpheaders['User-Agent'] = httpuseragent;
2771 else:
2772 httpuseragent.update({'User-Agent': httpuseragent});
2773 if(httpreferer is not None):
2774 if('Referer' in httpheaders):
2775 httpheaders['Referer'] = httpreferer;
2776 else:
2777 httpuseragent.update({'Referer': httpreferer});
2778 if(urlparts.username is not None or urlparts.password is not None):
2779 if(sys.version[0]=="2"):
2780 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
2781 if(sys.version[0]>="3"):
2782 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
2783 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
2784 time.sleep(sleep);
2785 if(postdata is not None and not isinstance(postdata, dict)):
2786 postdata = urlencode(postdata);
2787 try:
2788 if(httpmethod=="GET"):
2789 httpx_pool = httpcore.ConnectionPool(http1=True, http2=False);
2790 geturls_text = httpx_pool.request("GET", httpurl, headers=httpheaders);
2791 elif(httpmethod=="POST"):
2792 httpx_pool = httpcore.ConnectionPool(http1=True, http2=False);
2793 geturls_text = httpx_pool.request("GET", httpurl, data=postdata, headers=httpheaders);
2794 else:
2795 httpx_pool = httpcore.ConnectionPool(http1=True, http2=False);
2796 geturls_text = httpx_pool.request("GET", httpurl, headers=httpheaders);
2797 except httpcore.ConnectTimeout:
2798 log.info("Error With URL "+httpurl);
2799 return False;
2800 except httpcore.ConnectError:
2801 log.info("Error With URL "+httpurl);
2802 return False;
2803 except socket.timeout:
2804 log.info("Error With URL "+httpurl);
2805 return False;
2806 httpcodeout = geturls_text.status;
2807 httpcodereason = http_status_to_reason(geturls_text.status);
2808 httpversionout = "1.1";
2809 httpmethodout = httpmethod;
2810 httpurlout = str(httpurl);
2811 httpheaderout = geturls_text.headers;
2812 httpheadersentout = httpheaders;
2813 if(isinstance(httpheaderout, list)):
2814 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
2815 if(sys.version[0]=="2"):
2816 try:
2817 prehttpheaderout = httpheaderout;
2818 httpheaderkeys = httpheaderout.keys();
2819 imax = len(httpheaderkeys);
2820 ic = 0;
2821 httpheaderout = {};
2822 while(ic < imax):
2823 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
2824 ic += 1;
2825 except AttributeError:
2826 pass;
2827 httpheaderout = fix_header_names(httpheaderout);
2828 if(isinstance(httpheadersentout, list)):
2829 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
2830 httpheadersentout = fix_header_names(httpheadersentout);
2831 downloadsize = httpheaderout.get('Content-Length');
2832 if(downloadsize is not None):
2833 downloadsize = int(downloadsize);
2834 if downloadsize is None: downloadsize = 0;
2835 fulldatasize = 0;
2836 prevdownsize = 0;
2837 log.info("Downloading URL "+httpurl);
2838 with BytesIO() as strbuf:
2839 while True:
2840 databytes = geturls_text.read();
2841 if not databytes: break;
2842 datasize = len(databytes);
2843 fulldatasize = datasize + fulldatasize;
2844 percentage = "";
2845 if(downloadsize>0):
2846 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2847 downloaddiff = fulldatasize - prevdownsize;
2848 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2849 prevdownsize = fulldatasize;
2850 strbuf.write(databytes);
2851 break;
2852 strbuf.seek(0);
2853 returnval_content = strbuf.read();
2854 geturls_text.close();
2855 if(httpheaderout.get("Content-Encoding")=="gzip"):
2856 try:
2857 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
2858 except zlib.error:
2859 pass;
2860 elif(httpheaderout.get("Content-Encoding")=="deflate"):
2861 try:
2862 returnval_content = zlib.decompress(returnval_content);
2863 except zlib.error:
2864 pass;
2865 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
2866 try:
2867 returnval_content = brotli.decompress(returnval_content);
2868 except brotli.error:
2869 pass;
2870 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
2871 try:
2872 returnval_content = zstandard.decompress(returnval_content);
2873 except zstandard.error:
2874 pass;
2875 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
2876 try:
2877 returnval_content = lzma.decompress(returnval_content);
2878 except zstandard.error:
2879 pass;
2880 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
2881 try:
2882 returnval_content = bz2.decompress(returnval_content);
2883 except zstandard.error:
2884 pass;
2885 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "httpcore"};
2886 geturls_text.close();
2887 return returnval;
2889 if(not havehttpcore):
2890 def download_from_url_with_httpcore(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2891 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
2892 return returnval;
2894 if(havehttpcore):
2895 def download_from_url_file_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2896 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
2897 exec_time_start = time.time();
2898 myhash = hashlib.new("sha1");
2899 if(sys.version[0]=="2"):
2900 myhash.update(httpurl);
2901 myhash.update(str(buffersize));
2902 myhash.update(str(exec_time_start));
2903 if(sys.version[0]>="3"):
2904 myhash.update(httpurl.encode('utf-8'));
2905 myhash.update(str(buffersize).encode('utf-8'));
2906 myhash.update(str(exec_time_start).encode('utf-8'));
2907 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
2908 if(sleep<0):
2909 sleep = geturls_download_sleep;
2910 if(timeout<=0):
2911 timeout = 10;
2912 pretmpfilename = download_from_url_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
2913 if(not pretmpfilename):
2914 return False;
2915 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
2916 tmpfilename = f.name;
2917 try:
2918 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2919 except AttributeError:
2920 try:
2921 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2922 except ValueError:
2923 pass;
2924 except ValueError:
2925 pass;
2926 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2927 f.write(pretmpfilename.get('Content'));
2928 f.close();
2929 exec_time_end = time.time();
2930 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
2931 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
2932 return returnval;
2934 if(not havehttpcore):
2935 def download_from_url_file_with_httpcore(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2936 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
2937 return returnval;
2939 if(havehttpcore):
2940 def download_from_url_to_file_with_httpcore(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2941 global geturls_download_sleep, havezstd, havebrotli;
2942 if(sleep<0):
2943 sleep = geturls_download_sleep;
2944 if(timeout<=0):
2945 timeout = 10;
2946 if(not outfile=="-"):
2947 outpath = outpath.rstrip(os.path.sep);
2948 filepath = os.path.realpath(outpath+os.path.sep+outfile);
2949 if(not os.path.exists(outpath)):
2950 os.makedirs(outpath);
2951 if(os.path.exists(outpath) and os.path.isfile(outpath)):
2952 return False;
2953 if(os.path.exists(filepath) and os.path.isdir(filepath)):
2954 return False;
2955 pretmpfilename = download_from_url_file_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2956 if(not pretmpfilename):
2957 return False;
2958 tmpfilename = pretmpfilename.get('Filename');
2959 downloadsize = int(os.path.getsize(tmpfilename));
2960 fulldatasize = 0;
2961 log.info("Moving file "+tmpfilename+" to "+filepath);
2962 exec_time_start = time.time();
2963 shutil.move(tmpfilename, filepath);
2964 try:
2965 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2966 except AttributeError:
2967 try:
2968 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2969 except ValueError:
2970 pass;
2971 except ValueError:
2972 pass;
2973 exec_time_end = time.time();
2974 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
2975 if(os.path.exists(tmpfilename)):
2976 os.remove(tmpfilename);
2977 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2978 if(outfile=="-"):
2979 pretmpfilename = download_from_url_file_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2980 tmpfilename = pretmpfilename.get('Filename');
2981 downloadsize = int(os.path.getsize(tmpfilename));
2982 fulldatasize = 0;
2983 prevdownsize = 0;
2984 exec_time_start = time.time();
2985 with open(tmpfilename, 'rb') as ft:
2986 f = BytesIO();
2987 while True:
2988 databytes = ft.read(buffersize[1]);
2989 if not databytes: break;
2990 datasize = len(databytes);
2991 fulldatasize = datasize + fulldatasize;
2992 percentage = "";
2993 if(downloadsize>0):
2994 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2995 downloaddiff = fulldatasize - prevdownsize;
2996 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2997 prevdownsize = fulldatasize;
2998 f.write(databytes);
2999 f.seek(0);
3000 fdata = f.getvalue();
3001 f.close();
3002 ft.close();
3003 os.remove(tmpfilename);
3004 exec_time_end = time.time();
3005 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
3006 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3007 return returnval;
3009 if(not havehttpcore):
3010 def download_from_url_to_file_with_httpcore(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3011 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
3012 return returnval;
3014 if(havehttpcore):
3015 def download_from_url_with_httpcore2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3016 global geturls_download_sleep, havezstd, havebrotli;
3017 if(sleep<0):
3018 sleep = geturls_download_sleep;
3019 if(timeout<=0):
3020 timeout = 10;
3021 urlparts = urlparse.urlparse(httpurl);
3022 if(isinstance(httpheaders, list)):
3023 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
3024 httpheaders = fix_header_names(httpheaders);
3025 if(httpuseragent is not None):
3026 if('User-Agent' in httpheaders):
3027 httpheaders['User-Agent'] = httpuseragent;
3028 else:
3029 httpuseragent.update({'User-Agent': httpuseragent});
3030 if(httpreferer is not None):
3031 if('Referer' in httpheaders):
3032 httpheaders['Referer'] = httpreferer;
3033 else:
3034 httpuseragent.update({'Referer': httpreferer});
3035 if(urlparts.username is not None or urlparts.password is not None):
3036 if(sys.version[0]=="2"):
3037 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
3038 if(sys.version[0]>="3"):
3039 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
3040 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
3041 time.sleep(sleep);
3042 if(postdata is not None and not isinstance(postdata, dict)):
3043 postdata = urlencode(postdata);
3044 try:
3045 if(httpmethod=="GET"):
3046 httpx_pool = httpcore.ConnectionPool(http1=True, http2=True);
3047 geturls_text = httpx_pool.request("GET", httpurl, headers=httpheaders);
3048 elif(httpmethod=="POST"):
3049 httpx_pool = httpcore.ConnectionPool(http1=True, http2=True);
3050 geturls_text = httpx_pool.request("GET", httpurl, data=postdata, headers=httpheaders);
3051 else:
3052 httpx_pool = httpcore.ConnectionPool(http1=True, http2=True);
3053 geturls_text = httpx_pool.request("GET", httpurl, headers=httpheaders);
3054 except httpcore.ConnectTimeout:
3055 log.info("Error With URL "+httpurl);
3056 return False;
3057 except httpcore.ConnectError:
3058 log.info("Error With URL "+httpurl);
3059 return False;
3060 except socket.timeout:
3061 log.info("Error With URL "+httpurl);
3062 return False;
3063 httpcodeout = geturls_text.status;
3064 httpcodereason = http_status_to_reason(geturls_text.status);
3065 httpversionout = "1.1";
3066 httpmethodout = httpmethod;
3067 httpurlout = str(httpurl);
3068 httpheaderout = geturls_text.headers;
3069 httpheadersentout = httpheaders;
3070 if(isinstance(httpheaderout, list)):
3071 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
3072 if(sys.version[0]=="2"):
3073 try:
3074 prehttpheaderout = httpheaderout;
3075 httpheaderkeys = httpheaderout.keys();
3076 imax = len(httpheaderkeys);
3077 ic = 0;
3078 httpheaderout = {};
3079 while(ic < imax):
3080 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
3081 ic += 1;
3082 except AttributeError:
3083 pass;
3084 httpheaderout = fix_header_names(httpheaderout);
3085 if(isinstance(httpheadersentout, list)):
3086 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
3087 httpheadersentout = fix_header_names(httpheadersentout);
3088 downloadsize = httpheaderout.get('Content-Length');
3089 if(downloadsize is not None):
3090 downloadsize = int(downloadsize);
3091 if downloadsize is None: downloadsize = 0;
3092 fulldatasize = 0;
3093 prevdownsize = 0;
3094 log.info("Downloading URL "+httpurl);
3095 with BytesIO() as strbuf:
3096 while True:
3097 databytes = geturls_text.read();
3098 if not databytes: break;
3099 datasize = len(databytes);
3100 fulldatasize = datasize + fulldatasize;
3101 percentage = "";
3102 if(downloadsize>0):
3103 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3104 downloaddiff = fulldatasize - prevdownsize;
3105 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3106 prevdownsize = fulldatasize;
3107 strbuf.write(databytes);
3108 break;
3109 strbuf.seek(0);
3110 returnval_content = strbuf.read();
3111 geturls_text.close();
3112 if(httpheaderout.get("Content-Encoding")=="gzip"):
3113 try:
3114 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
3115 except zlib.error:
3116 pass;
3117 elif(httpheaderout.get("Content-Encoding")=="deflate"):
3118 try:
3119 returnval_content = zlib.decompress(returnval_content);
3120 except zlib.error:
3121 pass;
3122 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
3123 try:
3124 returnval_content = brotli.decompress(returnval_content);
3125 except brotli.error:
3126 pass;
3127 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
3128 try:
3129 returnval_content = zstandard.decompress(returnval_content);
3130 except zstandard.error:
3131 pass;
3132 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
3133 try:
3134 returnval_content = lzma.decompress(returnval_content);
3135 except zstandard.error:
3136 pass;
3137 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
3138 try:
3139 returnval_content = bz2.decompress(returnval_content);
3140 except zstandard.error:
3141 pass;
3142 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "httpcore2"};
3143 geturls_text.close();
3144 return returnval;
3146 if(not havehttpcore):
3147 def download_from_url_with_httpcore2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3148 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
3149 return returnval;
3151 if(havehttpcore):
3152 def download_from_url_file_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3153 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
3154 exec_time_start = time.time();
3155 myhash = hashlib.new("sha1");
3156 if(sys.version[0]=="2"):
3157 myhash.update(httpurl);
3158 myhash.update(str(buffersize));
3159 myhash.update(str(exec_time_start));
3160 if(sys.version[0]>="3"):
3161 myhash.update(httpurl.encode('utf-8'));
3162 myhash.update(str(buffersize).encode('utf-8'));
3163 myhash.update(str(exec_time_start).encode('utf-8'));
3164 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
3165 if(sleep<0):
3166 sleep = geturls_download_sleep;
3167 if(timeout<=0):
3168 timeout = 10;
3169 pretmpfilename = download_from_url_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
3170 if(not pretmpfilename):
3171 return False;
3172 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
3173 tmpfilename = f.name;
3174 try:
3175 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
3176 except AttributeError:
3177 try:
3178 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3179 except ValueError:
3180 pass;
3181 except ValueError:
3182 pass;
3183 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3184 f.write(pretmpfilename.get('Content'));
3185 f.close();
3186 exec_time_end = time.time();
3187 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
3188 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
3189 return returnval;
3191 if(not havehttpcore):
3192 def download_from_url_file_with_httpcore2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3193 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
3194 return returnval;
3196 if(havehttpcore):
3197 def download_from_url_to_file_with_httpcore2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3198 global geturls_download_sleep, havezstd, havebrotli;
3199 if(sleep<0):
3200 sleep = geturls_download_sleep;
3201 if(timeout<=0):
3202 timeout = 10;
3203 if(not outfile=="-"):
3204 outpath = outpath.rstrip(os.path.sep);
3205 filepath = os.path.realpath(outpath+os.path.sep+outfile);
3206 if(not os.path.exists(outpath)):
3207 os.makedirs(outpath);
3208 if(os.path.exists(outpath) and os.path.isfile(outpath)):
3209 return False;
3210 if(os.path.exists(filepath) and os.path.isdir(filepath)):
3211 return False;
3212 pretmpfilename = download_from_url_file_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3213 if(not pretmpfilename):
3214 return False;
3215 tmpfilename = pretmpfilename.get('Filename');
3216 downloadsize = int(os.path.getsize(tmpfilename));
3217 fulldatasize = 0;
3218 log.info("Moving file "+tmpfilename+" to "+filepath);
3219 exec_time_start = time.time();
3220 shutil.move(tmpfilename, filepath);
3221 try:
3222 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
3223 except AttributeError:
3224 try:
3225 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3226 except ValueError:
3227 pass;
3228 except ValueError:
3229 pass;
3230 exec_time_end = time.time();
3231 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
3232 if(os.path.exists(tmpfilename)):
3233 os.remove(tmpfilename);
3234 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3235 if(outfile=="-"):
3236 pretmpfilename = download_from_url_file_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3237 tmpfilename = pretmpfilename.get('Filename');
3238 downloadsize = int(os.path.getsize(tmpfilename));
3239 fulldatasize = 0;
3240 prevdownsize = 0;
3241 exec_time_start = time.time();
3242 with open(tmpfilename, 'rb') as ft:
3243 f = BytesIO();
3244 while True:
3245 databytes = ft.read(buffersize[1]);
3246 if not databytes: break;
3247 datasize = len(databytes);
3248 fulldatasize = datasize + fulldatasize;
3249 percentage = "";
3250 if(downloadsize>0):
3251 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3252 downloaddiff = fulldatasize - prevdownsize;
3253 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3254 prevdownsize = fulldatasize;
3255 f.write(databytes);
3256 f.seek(0);
3257 fdata = f.getvalue();
3258 f.close();
3259 ft.close();
3260 os.remove(tmpfilename);
3261 exec_time_end = time.time();
3262 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
3263 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3264 return returnval;
3266 if(not havehttpx):
3267 def download_from_url_to_file_with_httpcore2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3268 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
3269 return returnval;
3271 if(haveurllib3):
3272 def download_from_url_with_request3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3273 returnval = download_from_url_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
3274 return returnval;
3276 if(not haveurllib3):
3277 def download_from_url_with_request3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3278 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
3279 return returnval;
3281 if(haveurllib3):
3282 def download_from_url_file_with_request3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3283 returnval = download_from_url_file_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
3284 return returnval;
3286 if(not haveurllib3):
3287 def download_from_url_file_with_request3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3288 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
3289 return returnval;
3291 if(haveurllib3):
3292 def download_from_url_to_file_with_request3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3293 returnval = download_from_url_to_file_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
3294 return returnval;
3296 if(not haveurllib3):
3297 def download_from_url_to_file_with_request3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3298 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
3299 return returnval;
3301 if(haveurllib3):
3302 def download_from_url_with_urllib3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3303 global geturls_download_sleep, havezstd, havebrotli;
3304 if(sleep<0):
3305 sleep = geturls_download_sleep;
3306 if(timeout<=0):
3307 timeout = 10;
3308 urlparts = urlparse.urlparse(httpurl);
3309 if(isinstance(httpheaders, list)):
3310 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
3311 httpheaders = fix_header_names(httpheaders);
3312 if(httpuseragent is not None):
3313 if('User-Agent' in httpheaders):
3314 httpheaders['User-Agent'] = httpuseragent;
3315 else:
3316 httpuseragent.update({'User-Agent': httpuseragent});
3317 if(httpreferer is not None):
3318 if('Referer' in httpheaders):
3319 httpheaders['Referer'] = httpreferer;
3320 else:
3321 httpuseragent.update({'Referer': httpreferer});
3322 if(urlparts.username is not None or urlparts.password is not None):
3323 if(sys.version[0]=="2"):
3324 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
3325 if(sys.version[0]>="3"):
3326 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
3327 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
3328 time.sleep(sleep);
3329 timeout = urllib3.util.Timeout(connect=timeout, read=timeout);
3330 urllib_pool = urllib3.PoolManager(headers=httpheaders, timeout=timeout);
3331 if(postdata is not None and not isinstance(postdata, dict)):
3332 postdata = urlencode(postdata);
3333 try:
3334 if(httpmethod=="GET"):
3335 geturls_text = urllib_pool.request("GET", httpurl, headers=httpheaders, preload_content=False);
3336 elif(httpmethod=="POST"):
3337 geturls_text = urllib_pool.request("POST", httpurl, body=postdata, headers=httpheaders, preload_content=False);
3338 else:
3339 geturls_text = urllib_pool.request("GET", httpurl, headers=httpheaders, preload_content=False);
3340 except urllib3.exceptions.ConnectTimeoutError:
3341 log.info("Error With URL "+httpurl);
3342 return False;
3343 except urllib3.exceptions.ConnectError:
3344 log.info("Error With URL "+httpurl);
3345 return False;
3346 except urllib3.exceptions.MaxRetryError:
3347 log.info("Error With URL "+httpurl);
3348 return False;
3349 except socket.timeout:
3350 log.info("Error With URL "+httpurl);
3351 return False;
3352 except ValueError:
3353 log.info("Error With URL "+httpurl);
3354 return False;
3355 httpcodeout = geturls_text.status;
3356 httpcodereason = geturls_text.reason;
3357 if(geturls_text.version=="10"):
3358 httpversionout = "1.0";
3359 else:
3360 httpversionout = "1.1";
3361 httpmethodout = httpmethod;
3362 httpurlout = geturls_text.geturl();
3363 httpheaderout = geturls_text.info();
3364 httpheadersentout = httpheaders;
3365 if(isinstance(httpheaderout, list)):
3366 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
3367 if(sys.version[0]=="2"):
3368 try:
3369 prehttpheaderout = httpheaderout;
3370 httpheaderkeys = httpheaderout.keys();
3371 imax = len(httpheaderkeys);
3372 ic = 0;
3373 httpheaderout = {};
3374 while(ic < imax):
3375 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
3376 ic += 1;
3377 except AttributeError:
3378 pass;
3379 httpheaderout = fix_header_names(httpheaderout);
3380 if(isinstance(httpheadersentout, list)):
3381 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
3382 httpheadersentout = fix_header_names(httpheadersentout);
3383 downloadsize = httpheaderout.get('Content-Length');
3384 if(downloadsize is not None):
3385 downloadsize = int(downloadsize);
3386 if downloadsize is None: downloadsize = 0;
3387 fulldatasize = 0;
3388 prevdownsize = 0;
3389 log.info("Downloading URL "+httpurl);
3390 with BytesIO() as strbuf:
3391 while True:
3392 databytes = geturls_text.read(buffersize);
3393 if not databytes: break;
3394 datasize = len(databytes);
3395 fulldatasize = datasize + fulldatasize;
3396 percentage = "";
3397 if(downloadsize>0):
3398 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3399 downloaddiff = fulldatasize - prevdownsize;
3400 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3401 prevdownsize = fulldatasize;
3402 strbuf.write(databytes);
3403 strbuf.seek(0);
3404 returnval_content = strbuf.read();
3405 if(httpheaderout.get("Content-Encoding")=="gzip"):
3406 try:
3407 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
3408 except zlib.error:
3409 pass;
3410 elif(httpheaderout.get("Content-Encoding")=="deflate"):
3411 try:
3412 returnval_content = zlib.decompress(returnval_content);
3413 except zlib.error:
3414 pass;
3415 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
3416 try:
3417 returnval_content = brotli.decompress(returnval_content);
3418 except brotli.error:
3419 pass;
3420 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
3421 try:
3422 returnval_content = zstandard.decompress(returnval_content);
3423 except zstandard.error:
3424 pass;
3425 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
3426 try:
3427 returnval_content = lzma.decompress(returnval_content);
3428 except zstandard.error:
3429 pass;
3430 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
3431 try:
3432 returnval_content = bz2.decompress(returnval_content);
3433 except zstandard.error:
3434 pass;
3435 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "urllib3"};
3436 geturls_text.close();
3437 return returnval;
3439 if(not haveurllib3):
3440 def download_from_url_with_urllib3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3441 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
3442 return returnval;
3444 if(haveurllib3):
3445 def download_from_url_file_with_urllib3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3446 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
3447 exec_time_start = time.time();
3448 myhash = hashlib.new("sha1");
3449 if(sys.version[0]=="2"):
3450 myhash.update(httpurl);
3451 myhash.update(str(buffersize));
3452 myhash.update(str(exec_time_start));
3453 if(sys.version[0]>="3"):
3454 myhash.update(httpurl.encode('utf-8'));
3455 myhash.update(str(buffersize).encode('utf-8'));
3456 myhash.update(str(exec_time_start).encode('utf-8'));
3457 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
3458 if(sleep<0):
3459 sleep = geturls_download_sleep;
3460 if(timeout<=0):
3461 timeout = 10;
3462 pretmpfilename = download_from_url_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
3463 if(not pretmpfilename):
3464 return False;
3465 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
3466 tmpfilename = f.name;
3467 try:
3468 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
3469 except AttributeError:
3470 try:
3471 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3472 except ValueError:
3473 pass;
3474 except ValueError:
3475 pass;
3476 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3477 f.write(pretmpfilename.get('Content'));
3478 f.close();
3479 exec_time_end = time.time();
3480 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
3481 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
3482 return returnval;
3484 if(not haveurllib3):
3485 def download_from_url_file_with_urllib3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3486 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
3487 return returnval;
3489 if(haveurllib3):
3490 def download_from_url_to_file_with_urllib3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3491 global geturls_download_sleep, havezstd, havebrotli;
3492 if(sleep<0):
3493 sleep = geturls_download_sleep;
3494 if(timeout<=0):
3495 timeout = 10;
3496 if(not outfile=="-"):
3497 outpath = outpath.rstrip(os.path.sep);
3498 filepath = os.path.realpath(outpath+os.path.sep+outfile);
3499 if(not os.path.exists(outpath)):
3500 os.makedirs(outpath);
3501 if(os.path.exists(outpath) and os.path.isfile(outpath)):
3502 return False;
3503 if(os.path.exists(filepath) and os.path.isdir(filepath)):
3504 return False;
3505 pretmpfilename = download_from_url_file_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3506 if(not pretmpfilename):
3507 return False;
3508 tmpfilename = pretmpfilename.get('Filename');
3509 downloadsize = int(os.path.getsize(tmpfilename));
3510 fulldatasize = 0;
3511 log.info("Moving file "+tmpfilename+" to "+filepath);
3512 exec_time_start = time.time();
3513 shutil.move(tmpfilename, filepath);
3514 try:
3515 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
3516 except AttributeError:
3517 try:
3518 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3519 except ValueError:
3520 pass;
3521 except ValueError:
3522 pass;
3523 exec_time_end = time.time();
3524 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
3525 if(os.path.exists(tmpfilename)):
3526 os.remove(tmpfilename);
3527 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3528 if(outfile=="-"):
3529 pretmpfilename = download_from_url_file_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3530 tmpfilename = pretmpfilename.get('Filename');
3531 downloadsize = int(os.path.getsize(tmpfilename));
3532 fulldatasize = 0;
3533 prevdownsize = 0;
3534 exec_time_start = time.time();
3535 with open(tmpfilename, 'rb') as ft:
3536 f = BytesIO();
3537 while True:
3538 databytes = ft.read(buffersize[1]);
3539 if not databytes: break;
3540 datasize = len(databytes);
3541 fulldatasize = datasize + fulldatasize;
3542 percentage = "";
3543 if(downloadsize>0):
3544 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3545 downloaddiff = fulldatasize - prevdownsize;
3546 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3547 prevdownsize = fulldatasize;
3548 f.write(databytes);
3549 f.seek(0);
3550 fdata = f.getvalue();
3551 f.close();
3552 ft.close();
3553 os.remove(tmpfilename);
3554 exec_time_end = time.time();
3555 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
3556 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3557 return returnval;
3559 if(not haveurllib3):
3560 def download_from_url_to_file_with_urllib3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3561 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
3562 return returnval;
3564 if(havemechanize):
3565 def download_from_url_with_mechanize(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3566 global geturls_download_sleep, havezstd, havebrotli;
3567 if(sleep<0):
3568 sleep = geturls_download_sleep;
3569 if(timeout<=0):
3570 timeout = 10;
3571 urlparts = urlparse.urlparse(httpurl);
3572 if(isinstance(httpheaders, list)):
3573 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
3574 httpheaders = fix_header_names(httpheaders);
3575 if(httpuseragent is not None):
3576 if('User-Agent' in httpheaders):
3577 httpheaders['User-Agent'] = httpuseragent;
3578 else:
3579 httpuseragent.update({'User-Agent': httpuseragent});
3580 if(httpreferer is not None):
3581 if('Referer' in httpheaders):
3582 httpheaders['Referer'] = httpreferer;
3583 else:
3584 httpuseragent.update({'Referer': httpreferer});
3585 if(urlparts.username is not None or urlparts.password is not None):
3586 if(sys.version[0]=="2"):
3587 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
3588 if(sys.version[0]>="3"):
3589 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
3590 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
3591 geturls_opener = mechanize.Browser();
3592 if(isinstance(httpheaders, dict)):
3593 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
3594 time.sleep(sleep);
3595 geturls_opener.addheaders = httpheaders;
3596 geturls_opener.set_cookiejar(httpcookie);
3597 geturls_opener.set_handle_robots(False);
3598 if(postdata is not None and not isinstance(postdata, dict)):
3599 postdata = urlencode(postdata);
3600 try:
3601 if(httpmethod=="GET"):
3602 geturls_text = geturls_opener.open(httpurl);
3603 elif(httpmethod=="POST"):
3604 geturls_text = geturls_opener.open(httpurl, data=postdata);
3605 else:
3606 geturls_text = geturls_opener.open(httpurl);
3607 except mechanize.HTTPError as geturls_text_error:
3608 geturls_text = geturls_text_error;
3609 log.info("Error With URL "+httpurl);
3610 except URLError:
3611 log.info("Error With URL "+httpurl);
3612 return False;
3613 except socket.timeout:
3614 log.info("Error With URL "+httpurl);
3615 return False;
3616 httpcodeout = geturls_text.code;
3617 httpcodereason = geturls_text.msg;
3618 httpversionout = "1.1";
3619 httpmethodout = httpmethod;
3620 httpurlout = geturls_text.geturl();
3621 httpheaderout = geturls_text.info();
3622 reqhead = geturls_opener.request;
3623 httpheadersentout = reqhead.header_items();
3624 if(isinstance(httpheaderout, list)):
3625 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
3626 if(sys.version[0]=="2"):
3627 try:
3628 prehttpheaderout = httpheaderout;
3629 httpheaderkeys = httpheaderout.keys();
3630 imax = len(httpheaderkeys);
3631 ic = 0;
3632 httpheaderout = {};
3633 while(ic < imax):
3634 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
3635 ic += 1;
3636 except AttributeError:
3637 pass;
3638 httpheaderout = fix_header_names(httpheaderout);
3639 if(isinstance(httpheadersentout, list)):
3640 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
3641 httpheadersentout = fix_header_names(httpheadersentout);
3642 downloadsize = httpheaderout.get('Content-Length');
3643 if(downloadsize is not None):
3644 downloadsize = int(downloadsize);
3645 if downloadsize is None: downloadsize = 0;
3646 fulldatasize = 0;
3647 prevdownsize = 0;
3648 log.info("Downloading URL "+httpurl);
3649 with BytesIO() as strbuf:
3650 while True:
3651 databytes = geturls_text.read(buffersize);
3652 if not databytes: break;
3653 datasize = len(databytes);
3654 fulldatasize = datasize + fulldatasize;
3655 percentage = "";
3656 if(downloadsize>0):
3657 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3658 downloaddiff = fulldatasize - prevdownsize;
3659 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3660 prevdownsize = fulldatasize;
3661 strbuf.write(databytes);
3662 strbuf.seek(0);
3663 returnval_content = strbuf.read();
3664 if(httpheaderout.get("Content-Encoding")=="gzip"):
3665 try:
3666 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
3667 except zlib.error:
3668 pass;
3669 elif(httpheaderout.get("Content-Encoding")=="deflate"):
3670 try:
3671 returnval_content = zlib.decompress(returnval_content);
3672 except zlib.error:
3673 pass;
3674 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
3675 try:
3676 returnval_content = brotli.decompress(returnval_content);
3677 except brotli.error:
3678 pass;
3679 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
3680 try:
3681 returnval_content = zstandard.decompress(returnval_content);
3682 except zstandard.error:
3683 pass;
3684 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
3685 try:
3686 returnval_content = lzma.decompress(returnval_content);
3687 except zstandard.error:
3688 pass;
3689 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
3690 try:
3691 returnval_content = bz2.decompress(returnval_content);
3692 except zstandard.error:
3693 pass;
3694 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "mechanize"};
3695 geturls_text.close();
3696 return returnval;
3698 if(not havemechanize):
3699 def download_from_url_with_mechanize(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3700 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
3701 return returnval;
3703 if(havemechanize):
3704 def download_from_url_file_with_mechanize(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3705 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
3706 exec_time_start = time.time();
3707 myhash = hashlib.new("sha1");
3708 if(sys.version[0]=="2"):
3709 myhash.update(httpurl);
3710 myhash.update(str(buffersize));
3711 myhash.update(str(exec_time_start));
3712 if(sys.version[0]>="3"):
3713 myhash.update(httpurl.encode('utf-8'));
3714 myhash.update(str(buffersize).encode('utf-8'));
3715 myhash.update(str(exec_time_start).encode('utf-8'));
3716 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
3717 if(sleep<0):
3718 sleep = geturls_download_sleep;
3719 if(timeout<=0):
3720 timeout = 10;
3721 pretmpfilename = download_from_url_with_mechanize(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
3722 if(not pretmpfilename):
3723 return False;
3724 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
3725 tmpfilename = f.name;
3726 try:
3727 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
3728 except AttributeError:
3729 try:
3730 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3731 except ValueError:
3732 pass;
3733 except ValueError:
3734 pass;
3735 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3736 f.write(pretmpfilename.get('Content'));
3737 f.close();
3738 exec_time_end = time.time();
3739 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
3740 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
3741 return returnval;
3743 if(not havemechanize):
3744 def download_from_url_file_with_mechanize(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3745 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
3746 return returnval;
3748 if(havemechanize):
3749 def download_from_url_to_file_with_mechanize(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3750 global geturls_download_sleep, havezstd, havebrotli;
3751 if(sleep<0):
3752 sleep = geturls_download_sleep;
3753 if(timeout<=0):
3754 timeout = 10;
3755 if(not outfile=="-"):
3756 outpath = outpath.rstrip(os.path.sep);
3757 filepath = os.path.realpath(outpath+os.path.sep+outfile);
3758 if(not os.path.exists(outpath)):
3759 os.makedirs(outpath);
3760 if(os.path.exists(outpath) and os.path.isfile(outpath)):
3761 return False;
3762 if(os.path.exists(filepath) and os.path.isdir(filepath)):
3763 return False;
3764 pretmpfilename = download_from_url_file_with_mechanize(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3765 if(not pretmpfilename):
3766 return False;
3767 tmpfilename = pretmpfilename.get('Filename');
3768 downloadsize = int(os.path.getsize(tmpfilename));
3769 fulldatasize = 0;
3770 log.info("Moving file "+tmpfilename+" to "+filepath);
3771 exec_time_start = time.time();
3772 shutil.move(tmpfilename, filepath);
3773 try:
3774 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
3775 except AttributeError:
3776 try:
3777 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3778 except ValueError:
3779 pass;
3780 except ValueError:
3781 pass;
3782 exec_time_end = time.time();
3783 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
3784 if(os.path.exists(tmpfilename)):
3785 os.remove(tmpfilename);
3786 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3787 if(outfile=="-"):
3788 pretmpfilename = download_from_url_file_with_mechanize(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3789 tmpfilename = pretmpfilename.get('Filename');
3790 downloadsize = int(os.path.getsize(tmpfilename));
3791 fulldatasize = 0;
3792 prevdownsize = 0;
3793 exec_time_start = time.time();
3794 with open(tmpfilename, 'rb') as ft:
3795 f = BytesIO();
3796 while True:
3797 databytes = ft.read(buffersize[1]);
3798 if not databytes: break;
3799 datasize = len(databytes);
3800 fulldatasize = datasize + fulldatasize;
3801 percentage = "";
3802 if(downloadsize>0):
3803 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3804 downloaddiff = fulldatasize - prevdownsize;
3805 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3806 prevdownsize = fulldatasize;
3807 f.write(databytes);
3808 f.seek(0);
3809 fdata = f.getvalue();
3810 f.close();
3811 ft.close();
3812 os.remove(tmpfilename);
3813 exec_time_end = time.time();
3814 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
3815 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3816 return returnval;
3818 if(not havemechanize):
3819 def download_from_url_to_file_with_mechanize(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3820 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
3821 return returnval;
3823 if(havepycurl):
3824 def download_from_url_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3825 global geturls_download_sleep, havezstd, havebrotli;
3826 if(sleep<0):
3827 sleep = geturls_download_sleep;
3828 if(timeout<=0):
3829 timeout = 10;
3830 urlparts = urlparse.urlparse(httpurl);
3831 if(isinstance(httpheaders, list)):
3832 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
3833 httpheaders = fix_header_names(httpheaders);
3834 if(httpuseragent is not None):
3835 if('User-Agent' in httpheaders):
3836 httpheaders['User-Agent'] = httpuseragent;
3837 else:
3838 httpuseragent.update({'User-Agent': httpuseragent});
3839 if(httpreferer is not None):
3840 if('Referer' in httpheaders):
3841 httpheaders['Referer'] = httpreferer;
3842 else:
3843 httpuseragent.update({'Referer': httpreferer});
3844 if(urlparts.username is not None or urlparts.password is not None):
3845 if(sys.version[0]=="2"):
3846 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
3847 if(sys.version[0]>="3"):
3848 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
3849 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
3850 geturls_opener = build_opener(HTTPCookieProcessor(httpcookie));
3851 if(isinstance(httpheaders, dict)):
3852 httpheaders = make_http_headers_from_dict_to_pycurl(httpheaders);
3853 geturls_opener.addheaders = httpheaders;
3854 time.sleep(sleep);
3855 if(postdata is not None and not isinstance(postdata, dict)):
3856 postdata = urlencode(postdata);
3857 retrieved_body = BytesIO();
3858 retrieved_headers = BytesIO();
3859 try:
3860 if(httpmethod=="GET"):
3861 geturls_text = pycurl.Curl();
3862 geturls_text.setopt(geturls_text.URL, httpurl);
3863 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
3864 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
3865 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
3866 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
3867 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
3868 geturls_text.perform();
3869 elif(httpmethod=="POST"):
3870 geturls_text = pycurl.Curl();
3871 geturls_text.setopt(geturls_text.URL, httpurl);
3872 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
3873 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
3874 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
3875 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
3876 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
3877 geturls_text.setopt(geturls_text.POST, True);
3878 geturls_text.setopt(geturls_text.POSTFIELDS, postdata);
3879 geturls_text.perform();
3880 else:
3881 geturls_text = pycurl.Curl();
3882 geturls_text.setopt(geturls_text.URL, httpurl);
3883 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
3884 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
3885 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
3886 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
3887 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
3888 geturls_text.perform();
3889 retrieved_headers.seek(0);
3890 if(sys.version[0]=="2"):
3891 pycurlhead = retrieved_headers.read();
3892 if(sys.version[0]>="3"):
3893 pycurlhead = retrieved_headers.read().decode('UTF-8');
3894 pyhttpverinfo = re.findall(r'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead.splitlines()[0].strip().rstrip('\r\n'))[0];
3895 pycurlheadersout = make_http_headers_from_pycurl_to_dict(pycurlhead);
3896 retrieved_body.seek(0);
3897 except socket.timeout:
3898 log.info("Error With URL "+httpurl);
3899 return False;
3900 except socket.gaierror:
3901 log.info("Error With URL "+httpurl);
3902 return False;
3903 except ValueError:
3904 log.info("Error With URL "+httpurl);
3905 return False;
3906 httpcodeout = geturls_text.getinfo(geturls_text.HTTP_CODE);
3907 httpcodereason = http_status_to_reason(geturls_text.getinfo(geturls_text.HTTP_CODE));
3908 httpversionout = pyhttpverinfo[0];
3909 httpmethodout = httpmethod;
3910 httpurlout = geturls_text.getinfo(geturls_text.EFFECTIVE_URL);
3911 httpheaderout = pycurlheadersout;
3912 httpheadersentout = httpheaders;
3913 if(isinstance(httpheaderout, list)):
3914 httpheaderout = dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout)));
3915 if(sys.version[0]=="2"):
3916 try:
3917 prehttpheaderout = httpheaderout;
3918 httpheaderkeys = httpheaderout.keys();
3919 imax = len(httpheaderkeys);
3920 ic = 0;
3921 httpheaderout = {};
3922 while(ic < imax):
3923 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
3924 ic += 1;
3925 except AttributeError:
3926 pass;
3927 httpheaderout = fix_header_names(httpheaderout);
3928 if(isinstance(httpheadersentout, list)):
3929 httpheadersentout = dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout)));
3930 httpheadersentout = fix_header_names(httpheadersentout);
3931 downloadsize = httpheaderout.get('Content-Length');
3932 if(downloadsize is not None):
3933 downloadsize = int(downloadsize);
3934 if downloadsize is None: downloadsize = 0;
3935 fulldatasize = 0;
3936 prevdownsize = 0;
3937 log.info("Downloading URL "+httpurl);
3938 with BytesIO() as strbuf:
3939 while True:
3940 databytes = retrieved_body.read(buffersize);
3941 if not databytes: break;
3942 datasize = len(databytes);
3943 fulldatasize = datasize + fulldatasize;
3944 percentage = "";
3945 if(downloadsize>0):
3946 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3947 downloaddiff = fulldatasize - prevdownsize;
3948 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3949 prevdownsize = fulldatasize;
3950 strbuf.write(databytes);
3951 strbuf.seek(0);
3952 returnval_content = strbuf.read();
3953 if(httpheaderout.get("Content-Encoding")=="gzip"):
3954 try:
3955 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
3956 except zlib.error:
3957 pass;
3958 elif(httpheaderout.get("Content-Encoding")=="deflate"):
3959 try:
3960 returnval_content = zlib.decompress(returnval_content);
3961 except zlib.error:
3962 pass;
3963 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
3964 try:
3965 returnval_content = brotli.decompress(returnval_content);
3966 except brotli.error:
3967 pass;
3968 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
3969 try:
3970 returnval_content = zstandard.decompress(returnval_content);
3971 except zstandard.error:
3972 pass;
3973 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
3974 try:
3975 returnval_content = lzma.decompress(returnval_content);
3976 except zstandard.error:
3977 pass;
3978 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
3979 try:
3980 returnval_content = bz2.decompress(returnval_content);
3981 except zstandard.error:
3982 pass;
3983 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "pycurl"};
3984 geturls_text.close();
3985 return returnval;
3987 if(not havepycurl):
3988 def download_from_url_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3989 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
3990 return returnval;
3992 if(havepycurl):
3993 def download_from_url_file_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3994 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
3995 exec_time_start = time.time();
3996 myhash = hashlib.new("sha1");
3997 if(sys.version[0]=="2"):
3998 myhash.update(httpurl);
3999 myhash.update(str(buffersize));
4000 myhash.update(str(exec_time_start));
4001 if(sys.version[0]>="3"):
4002 myhash.update(httpurl.encode('utf-8'));
4003 myhash.update(str(buffersize).encode('utf-8'));
4004 myhash.update(str(exec_time_start).encode('utf-8'));
4005 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
4006 if(sleep<0):
4007 sleep = geturls_download_sleep;
4008 if(timeout<=0):
4009 timeout = 10;
4010 pretmpfilename = download_from_url_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
4011 if(not pretmpfilename):
4012 return False;
4013 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
4014 tmpfilename = f.name;
4015 try:
4016 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
4017 except AttributeError:
4018 try:
4019 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4020 except ValueError:
4021 pass;
4022 except ValueError:
4023 pass;
4024 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4025 f.write(pretmpfilename.get('Content'));
4026 f.close();
4027 exec_time_end = time.time();
4028 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
4029 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
4030 return returnval;
4032 if(not havepycurl):
4033 def download_from_url_file_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4034 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
4035 return returnval;
4037 if(havepycurl):
4038 def download_from_url_to_file_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4039 global geturls_download_sleep, havezstd, havebrotli;
4040 if(sleep<0):
4041 sleep = geturls_download_sleep;
4042 if(timeout<=0):
4043 timeout = 10;
4044 if(not outfile=="-"):
4045 outpath = outpath.rstrip(os.path.sep);
4046 filepath = os.path.realpath(outpath+os.path.sep+outfile);
4047 if(not os.path.exists(outpath)):
4048 os.makedirs(outpath);
4049 if(os.path.exists(outpath) and os.path.isfile(outpath)):
4050 return False;
4051 if(os.path.exists(filepath) and os.path.isdir(filepath)):
4052 return False;
4053 pretmpfilename = download_from_url_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4054 if(not pretmpfilename):
4055 return False;
4056 tmpfilename = pretmpfilename.get('Filename');
4057 downloadsize = int(os.path.getsize(tmpfilename));
4058 fulldatasize = 0;
4059 log.info("Moving file "+tmpfilename+" to "+filepath);
4060 exec_time_start = time.time();
4061 shutil.move(tmpfilename, filepath);
4062 try:
4063 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
4064 except AttributeError:
4065 try:
4066 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4067 except ValueError:
4068 pass;
4069 except ValueError:
4070 pass;
4071 exec_time_end = time.time();
4072 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
4073 if(os.path.exists(tmpfilename)):
4074 os.remove(tmpfilename);
4075 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4076 if(outfile=="-"):
4077 pretmpfilename = download_from_url_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4078 tmpfilename = pretmpfilename.get('Filename');
4079 downloadsize = int(os.path.getsize(tmpfilename));
4080 fulldatasize = 0;
4081 prevdownsize = 0;
4082 exec_time_start = time.time();
4083 with open(tmpfilename, 'rb') as ft:
4084 f = BytesIO();
4085 while True:
4086 databytes = ft.read(buffersize[1]);
4087 if not databytes: break;
4088 datasize = len(databytes);
4089 fulldatasize = datasize + fulldatasize;
4090 percentage = "";
4091 if(downloadsize>0):
4092 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4093 downloaddiff = fulldatasize - prevdownsize;
4094 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4095 prevdownsize = fulldatasize;
4096 f.write(databytes);
4097 f.seek(0);
4098 fdata = f.getvalue();
4099 f.close();
4100 ft.close();
4101 os.remove(tmpfilename);
4102 exec_time_end = time.time();
4103 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
4104 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4105 return returnval;
4107 if(not havepycurl):
4108 def download_from_url_to_file_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4109 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
4110 return returnval;
4112 if(havepycurl and hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4113 def download_from_url_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4114 global geturls_download_sleep, havezstd, havebrotli;
4115 if(sleep<0):
4116 sleep = geturls_download_sleep;
4117 if(timeout<=0):
4118 timeout = 10;
4119 urlparts = urlparse.urlparse(httpurl);
4120 if(isinstance(httpheaders, list)):
4121 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
4122 httpheaders = fix_header_names(httpheaders);
4123 if(httpuseragent is not None):
4124 if('User-Agent' in httpheaders):
4125 httpheaders['User-Agent'] = httpuseragent;
4126 else:
4127 httpuseragent.update({'User-Agent': httpuseragent});
4128 if(httpreferer is not None):
4129 if('Referer' in httpheaders):
4130 httpheaders['Referer'] = httpreferer;
4131 else:
4132 httpuseragent.update({'Referer': httpreferer});
4133 if(urlparts.username is not None or urlparts.password is not None):
4134 if(sys.version[0]=="2"):
4135 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
4136 if(sys.version[0]>="3"):
4137 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
4138 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
4139 geturls_opener = build_opener(HTTPCookieProcessor(httpcookie));
4140 if(isinstance(httpheaders, dict)):
4141 httpheaders = make_http_headers_from_dict_to_pycurl(httpheaders);
4142 geturls_opener.addheaders = httpheaders;
4143 time.sleep(sleep);
4144 if(postdata is not None and not isinstance(postdata, dict)):
4145 postdata = urlencode(postdata);
4146 retrieved_body = BytesIO();
4147 retrieved_headers = BytesIO();
4148 try:
4149 if(httpmethod=="GET"):
4150 geturls_text = pycurl.Curl();
4151 geturls_text.setopt(geturls_text.URL, httpurl);
4152 geturls_text.setopt(geturls_text.HTTP_VERSION, geturls_text.CURL_HTTP_VERSION_2_0);
4153 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
4154 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
4155 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
4156 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
4157 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
4158 geturls_text.perform();
4159 elif(httpmethod=="POST"):
4160 geturls_text = pycurl.Curl();
4161 geturls_text.setopt(geturls_text.URL, httpurl);
4162 geturls_text.setopt(geturls_text.HTTP_VERSION, geturls_text.CURL_HTTP_VERSION_2_0);
4163 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
4164 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
4165 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
4166 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
4167 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
4168 geturls_text.setopt(geturls_text.POST, True);
4169 geturls_text.setopt(geturls_text.POSTFIELDS, postdata);
4170 geturls_text.perform();
4171 else:
4172 geturls_text = pycurl.Curl();
4173 geturls_text.setopt(geturls_text.URL, httpurl);
4174 geturls_text.setopt(geturls_text.HTTP_VERSION, geturls_text.CURL_HTTP_VERSION_2_0);
4175 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
4176 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
4177 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
4178 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
4179 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
4180 geturls_text.perform();
4181 retrieved_headers.seek(0);
4182 if(sys.version[0]=="2"):
4183 pycurlhead = retrieved_headers.read();
4184 if(sys.version[0]>="3"):
4185 pycurlhead = retrieved_headers.read().decode('UTF-8');
4186 pyhttpverinfo = re.findall(r'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead.splitlines()[0].strip())[0];
4187 pycurlheadersout = make_http_headers_from_pycurl_to_dict(pycurlhead);
4188 retrieved_body.seek(0);
4189 except socket.timeout:
4190 log.info("Error With URL "+httpurl);
4191 return False;
4192 except socket.gaierror:
4193 log.info("Error With URL "+httpurl);
4194 return False;
4195 except ValueError:
4196 log.info("Error With URL "+httpurl);
4197 return False;
4198 httpcodeout = geturls_text.getinfo(geturls_text.HTTP_CODE);
4199 httpcodereason = http_status_to_reason(geturls_text.getinfo(geturls_text.HTTP_CODE));
4200 httpversionout = pyhttpverinfo[0];
4201 httpmethodout = httpmethod;
4202 httpurlout = geturls_text.getinfo(geturls_text.EFFECTIVE_URL);
4203 httpheaderout = pycurlheadersout;
4204 httpheadersentout = httpheaders;
4205 if(isinstance(httpheaderout, list)):
4206 httpheaderout = dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout)));
4207 if(sys.version[0]=="2"):
4208 try:
4209 prehttpheaderout = httpheaderout;
4210 httpheaderkeys = httpheaderout.keys();
4211 imax = len(httpheaderkeys);
4212 ic = 0;
4213 httpheaderout = {};
4214 while(ic < imax):
4215 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
4216 ic += 1;
4217 except AttributeError:
4218 pass;
4219 httpheaderout = fix_header_names(httpheaderout);
4220 if(isinstance(httpheadersentout, list)):
4221 httpheadersentout = dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout)));
4222 httpheadersentout = fix_header_names(httpheadersentout);
4223 downloadsize = httpheaderout.get('Content-Length');
4224 if(downloadsize is not None):
4225 downloadsize = int(downloadsize);
4226 if downloadsize is None: downloadsize = 0;
4227 fulldatasize = 0;
4228 prevdownsize = 0;
4229 log.info("Downloading URL "+httpurl);
4230 with BytesIO() as strbuf:
4231 while True:
4232 databytes = retrieved_body.read(buffersize);
4233 if not databytes: break;
4234 datasize = len(databytes);
4235 fulldatasize = datasize + fulldatasize;
4236 percentage = "";
4237 if(downloadsize>0):
4238 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4239 downloaddiff = fulldatasize - prevdownsize;
4240 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4241 prevdownsize = fulldatasize;
4242 strbuf.write(databytes);
4243 strbuf.seek(0);
4244 returnval_content = strbuf.read();
4245 if(httpheaderout.get("Content-Encoding")=="gzip"):
4246 try:
4247 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
4248 except zlib.error:
4249 pass;
4250 elif(httpheaderout.get("Content-Encoding")=="deflate"):
4251 try:
4252 returnval_content = zlib.decompress(returnval_content);
4253 except zlib.error:
4254 pass;
4255 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
4256 try:
4257 returnval_content = brotli.decompress(returnval_content);
4258 except brotli.error:
4259 pass;
4260 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
4261 try:
4262 returnval_content = zstandard.decompress(returnval_content);
4263 except zstandard.error:
4264 pass;
4265 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
4266 try:
4267 returnval_content = lzma.decompress(returnval_content);
4268 except zstandard.error:
4269 pass;
4270 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
4271 try:
4272 returnval_content = bz2.decompress(returnval_content);
4273 except zstandard.error:
4274 pass;
4275 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "pycurl2"};
4276 geturls_text.close();
4277 return returnval;
4279 if(not havepycurl):
4280 def download_from_url_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4281 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
4282 return returnval;
4284 if(havepycurl and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4285 def download_from_url_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4286 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
4287 return returnval;
4289 if(havepycurl and hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4290 def download_from_url_file_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4291 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
4292 exec_time_start = time.time();
4293 myhash = hashlib.new("sha1");
4294 if(sys.version[0]=="2"):
4295 myhash.update(httpurl);
4296 myhash.update(str(buffersize));
4297 myhash.update(str(exec_time_start));
4298 if(sys.version[0]>="3"):
4299 myhash.update(httpurl.encode('utf-8'));
4300 myhash.update(str(buffersize).encode('utf-8'));
4301 myhash.update(str(exec_time_start).encode('utf-8'));
4302 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
4303 if(sleep<0):
4304 sleep = geturls_download_sleep;
4305 if(timeout<=0):
4306 timeout = 10;
4307 pretmpfilename = download_from_url_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
4308 if(not pretmpfilename):
4309 return False;
4310 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
4311 tmpfilename = f.name;
4312 try:
4313 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
4314 except AttributeError:
4315 try:
4316 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4317 except ValueError:
4318 pass;
4319 except ValueError:
4320 pass;
4321 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4322 f.write(pretmpfilename.get('Content'));
4323 f.close();
4324 exec_time_end = time.time();
4325 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
4326 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
4327 return returnval;
4329 if(not havepycurl):
4330 def download_from_url_file_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4331 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
4332 return returnval;
4334 if(havepycurl and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4335 def download_from_url_file_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4336 returnval = download_from_url_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
4337 return returnval;
4339 if(havepycurl and hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4340 def download_from_url_to_file_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4341 global geturls_download_sleep, havezstd, havebrotli;
4342 if(sleep<0):
4343 sleep = geturls_download_sleep;
4344 if(timeout<=0):
4345 timeout = 10;
4346 if(not outfile=="-"):
4347 outpath = outpath.rstrip(os.path.sep);
4348 filepath = os.path.realpath(outpath+os.path.sep+outfile);
4349 if(not os.path.exists(outpath)):
4350 os.makedirs(outpath);
4351 if(os.path.exists(outpath) and os.path.isfile(outpath)):
4352 return False;
4353 if(os.path.exists(filepath) and os.path.isdir(filepath)):
4354 return False;
4355 pretmpfilename = download_from_url_file_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4356 if(not pretmpfilename):
4357 return False;
4358 tmpfilename = pretmpfilename.get('Filename');
4359 downloadsize = int(os.path.getsize(tmpfilename));
4360 fulldatasize = 0;
4361 log.info("Moving file "+tmpfilename+" to "+filepath);
4362 exec_time_start = time.time();
4363 shutil.move(tmpfilename, filepath);
4364 try:
4365 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
4366 except AttributeError:
4367 try:
4368 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4369 except ValueError:
4370 pass;
4371 except ValueError:
4372 pass;
4373 exec_time_end = time.time();
4374 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
4375 if(os.path.exists(tmpfilename)):
4376 os.remove(tmpfilename);
4377 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4378 if(outfile=="-"):
4379 pretmpfilename = download_from_url_file_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4380 tmpfilename = pretmpfilename.get('Filename');
4381 downloadsize = int(os.path.getsize(tmpfilename));
4382 fulldatasize = 0;
4383 prevdownsize = 0;
4384 exec_time_start = time.time();
4385 with open(tmpfilename, 'rb') as ft:
4386 f = BytesIO();
4387 while True:
4388 databytes = ft.read(buffersize[1]);
4389 if not databytes: break;
4390 datasize = len(databytes);
4391 fulldatasize = datasize + fulldatasize;
4392 percentage = "";
4393 if(downloadsize>0):
4394 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4395 downloaddiff = fulldatasize - prevdownsize;
4396 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4397 prevdownsize = fulldatasize;
4398 f.write(databytes);
4399 f.seek(0);
4400 fdata = f.getvalue();
4401 f.close();
4402 ft.close();
4403 os.remove(tmpfilename);
4404 exec_time_end = time.time();
4405 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
4406 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4407 return returnval;
4409 if(not havepycurl):
4410 def download_from_url_to_file_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4411 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
4412 return returnval;
4414 if(havepycurl and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4415 def download_from_url_to_file_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4416 returnval = download_from_url_to_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
4417 return returnval;
4419 if(havepycurl and hasattr(pycurl, "CURL_HTTP_VERSION_3_0")):
4420 def download_from_url_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4421 global geturls_download_sleep, havezstd, havebrotli;
4422 if(sleep<0):
4423 sleep = geturls_download_sleep;
4424 if(timeout<=0):
4425 timeout = 10;
4426 urlparts = urlparse.urlparse(httpurl);
4427 if(isinstance(httpheaders, list)):
4428 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
4429 httpheaders = fix_header_names(httpheaders);
4430 if(httpuseragent is not None):
4431 if('User-Agent' in httpheaders):
4432 httpheaders['User-Agent'] = httpuseragent;
4433 else:
4434 httpuseragent.update({'User-Agent': httpuseragent});
4435 if(httpreferer is not None):
4436 if('Referer' in httpheaders):
4437 httpheaders['Referer'] = httpreferer;
4438 else:
4439 httpuseragent.update({'Referer': httpreferer});
4440 if(urlparts.username is not None or urlparts.password is not None):
4441 if(sys.version[0]=="2"):
4442 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
4443 if(sys.version[0]>="3"):
4444 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
4445 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
4446 geturls_opener = build_opener(HTTPCookieProcessor(httpcookie));
4447 if(isinstance(httpheaders, dict)):
4448 httpheaders = make_http_headers_from_dict_to_pycurl(httpheaders);
4449 geturls_opener.addheaders = httpheaders;
4450 time.sleep(sleep);
4451 if(postdata is not None and not isinstance(postdata, dict)):
4452 postdata = urlencode(postdata);
4453 retrieved_body = BytesIO();
4454 retrieved_headers = BytesIO();
4455 try:
4456 if(httpmethod=="GET"):
4457 geturls_text = pycurl.Curl();
4458 geturls_text.setopt(geturls_text.URL, httpurl);
4459 geturls_text.setopt(geturls_text.HTTP_VERSION, geturls_text.CURL_HTTP_VERSION_3_0);
4460 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
4461 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
4462 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
4463 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
4464 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
4465 geturls_text.perform();
4466 elif(httpmethod=="POST"):
4467 geturls_text = pycurl.Curl();
4468 geturls_text.setopt(geturls_text.URL, httpurl);
4469 geturls_text.setopt(geturls_text.HTTP_VERSION, geturls_text.CURL_HTTP_VERSION_3_0);
4470 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
4471 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
4472 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
4473 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
4474 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
4475 geturls_text.setopt(geturls_text.POST, True);
4476 geturls_text.setopt(geturls_text.POSTFIELDS, postdata);
4477 geturls_text.perform();
4478 else:
4479 geturls_text = pycurl.Curl();
4480 geturls_text.setopt(geturls_text.URL, httpurl);
4481 geturls_text.setopt(geturls_text.HTTP_VERSION, geturls_text.CURL_HTTP_VERSION_3_0);
4482 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
4483 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
4484 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
4485 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
4486 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
4487 geturls_text.perform();
4488 retrieved_headers.seek(0);
4489 if(sys.version[0]=="2"):
4490 pycurlhead = retrieved_headers.read();
4491 if(sys.version[0]>="3"):
4492 pycurlhead = retrieved_headers.read().decode('UTF-8');
4493 pyhttpverinfo = re.findall(r'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead.splitlines()[0].strip().rstrip('\r\n'))[0];
4494 pycurlheadersout = make_http_headers_from_pycurl_to_dict(pycurlhead);
4495 retrieved_body.seek(0);
4496 except socket.timeout:
4497 log.info("Error With URL "+httpurl);
4498 return False;
4499 except socket.gaierror:
4500 log.info("Error With URL "+httpurl);
4501 return False;
4502 except ValueError:
4503 log.info("Error With URL "+httpurl);
4504 return False;
4505 httpcodeout = geturls_text.getinfo(geturls_text.HTTP_CODE);
4506 httpcodereason = http_status_to_reason(geturls_text.getinfo(geturls_text.HTTP_CODE));
4507 httpversionout = pyhttpverinfo[0];
4508 httpmethodout = httpmethod;
4509 httpurlout = geturls_text.getinfo(geturls_text.EFFECTIVE_URL);
4510 httpheaderout = pycurlheadersout;
4511 httpheadersentout = httpheaders;
4512 if(isinstance(httpheaderout, list)):
4513 httpheaderout = dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout)));
4514 if(sys.version[0]=="2"):
4515 try:
4516 prehttpheaderout = httpheaderout;
4517 httpheaderkeys = httpheaderout.keys();
4518 imax = len(httpheaderkeys);
4519 ic = 0;
4520 httpheaderout = {};
4521 while(ic < imax):
4522 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
4523 ic += 1;
4524 except AttributeError:
4525 pass;
4526 httpheaderout = fix_header_names(httpheaderout);
4527 if(isinstance(httpheadersentout, list)):
4528 httpheadersentout = dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout)));
4529 httpheadersentout = fix_header_names(httpheadersentout);
4530 downloadsize = httpheaderout.get('Content-Length');
4531 if(downloadsize is not None):
4532 downloadsize = int(downloadsize);
4533 if downloadsize is None: downloadsize = 0;
4534 fulldatasize = 0;
4535 prevdownsize = 0;
4536 log.info("Downloading URL "+httpurl);
4537 with BytesIO() as strbuf:
4538 while True:
4539 databytes = retrieved_body.read(buffersize);
4540 if not databytes: break;
4541 datasize = len(databytes);
4542 fulldatasize = datasize + fulldatasize;
4543 percentage = "";
4544 if(downloadsize>0):
4545 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4546 downloaddiff = fulldatasize - prevdownsize;
4547 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4548 prevdownsize = fulldatasize;
4549 strbuf.write(databytes);
4550 strbuf.seek(0);
4551 returnval_content = strbuf.read();
4552 if(httpheaderout.get("Content-Encoding")=="gzip"):
4553 try:
4554 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
4555 except zlib.error:
4556 pass;
4557 elif(httpheaderout.get("Content-Encoding")=="deflate"):
4558 try:
4559 returnval_content = zlib.decompress(returnval_content);
4560 except zlib.error:
4561 pass;
4562 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
4563 try:
4564 returnval_content = brotli.decompress(returnval_content);
4565 except brotli.error:
4566 pass;
4567 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
4568 try:
4569 returnval_content = zstandard.decompress(returnval_content);
4570 except zstandard.error:
4571 pass;
4572 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
4573 try:
4574 returnval_content = lzma.decompress(returnval_content);
4575 except zstandard.error:
4576 pass;
4577 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
4578 try:
4579 returnval_content = bz2.decompress(returnval_content);
4580 except zstandard.error:
4581 pass;
4582 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "pycurl3"};
4583 geturls_text.close();
4584 return returnval;
4586 if(not havepycurl):
4587 def download_from_url_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4588 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
4589 return returnval;
4591 if(havepycurl and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4592 def download_from_url_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4593 returnval = download_from_url_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
4594 return returnval;
4596 if(havepycurl and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4597 def download_from_url_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4598 returnval = download_from_url_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
4599 return returnval;
4601 if(havepycurl and hasattr(pycurl, "CURL_HTTP_VERSION_3_0")):
4602 def download_from_url_file_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4603 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
4604 exec_time_start = time.time();
4605 myhash = hashlib.new("sha1");
4606 if(sys.version[0]=="2"):
4607 myhash.update(httpurl);
4608 myhash.update(str(buffersize));
4609 myhash.update(str(exec_time_start));
4610 if(sys.version[0]>="3"):
4611 myhash.update(httpurl.encode('utf-8'));
4612 myhash.update(str(buffersize).encode('utf-8'));
4613 myhash.update(str(exec_time_start).encode('utf-8'));
4614 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
4615 if(sleep<0):
4616 sleep = geturls_download_sleep;
4617 if(timeout<=0):
4618 timeout = 10;
4619 pretmpfilename = download_from_url_with_pycurl3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
4620 if(not pretmpfilename):
4621 return False;
4622 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
4623 tmpfilename = f.name;
4624 try:
4625 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
4626 except AttributeError:
4627 try:
4628 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4629 except ValueError:
4630 pass;
4631 except ValueError:
4632 pass;
4633 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4634 f.write(pretmpfilename.get('Content'));
4635 f.close();
4636 exec_time_end = time.time();
4637 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
4638 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
4639 return returnval;
4641 if(not havepycurl):
4642 def download_from_url_file_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4643 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
4644 return returnval;
4646 if(havepycurl and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4647 def download_from_url_file_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4648 returnval = download_from_url_file_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
4649 return returnval;
4651 if(havepycurl and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4652 def download_from_url_file_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4653 returnval = download_from_url_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
4654 return returnval;
4656 if(havepycurl and hasattr(pycurl, "CURL_HTTP_VERSION_3_0")):
4657 def download_from_url_to_file_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4658 global geturls_download_sleep, havezstd, havebrotli;
4659 if(sleep<0):
4660 sleep = geturls_download_sleep;
4661 if(timeout<=0):
4662 timeout = 10;
4663 if(not outfile=="-"):
4664 outpath = outpath.rstrip(os.path.sep);
4665 filepath = os.path.realpath(outpath+os.path.sep+outfile);
4666 if(not os.path.exists(outpath)):
4667 os.makedirs(outpath);
4668 if(os.path.exists(outpath) and os.path.isfile(outpath)):
4669 return False;
4670 if(os.path.exists(filepath) and os.path.isdir(filepath)):
4671 return False;
4672 pretmpfilename = download_from_url_file_with_pycurl3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4673 if(not pretmpfilename):
4674 return False;
4675 tmpfilename = pretmpfilename.get('Filename');
4676 downloadsize = int(os.path.getsize(tmpfilename));
4677 fulldatasize = 0;
4678 log.info("Moving file "+tmpfilename+" to "+filepath);
4679 exec_time_start = time.time();
4680 shutil.move(tmpfilename, filepath);
4681 try:
4682 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
4683 except AttributeError:
4684 try:
4685 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4686 except ValueError:
4687 pass;
4688 except ValueError:
4689 pass;
4690 exec_time_end = time.time();
4691 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
4692 if(os.path.exists(tmpfilename)):
4693 os.remove(tmpfilename);
4694 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4695 if(outfile=="-"):
4696 pretmpfilename = download_from_url_file_with_pycurl3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4697 tmpfilename = pretmpfilename.get('Filename');
4698 downloadsize = int(os.path.getsize(tmpfilename));
4699 fulldatasize = 0;
4700 prevdownsize = 0;
4701 exec_time_start = time.time();
4702 with open(tmpfilename, 'rb') as ft:
4703 f = BytesIO();
4704 while True:
4705 databytes = ft.read(buffersize[1]);
4706 if not databytes: break;
4707 datasize = len(databytes);
4708 fulldatasize = datasize + fulldatasize;
4709 percentage = "";
4710 if(downloadsize>0):
4711 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4712 downloaddiff = fulldatasize - prevdownsize;
4713 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4714 prevdownsize = fulldatasize;
4715 f.write(databytes);
4716 f.seek(0);
4717 fdata = f.getvalue();
4718 f.close();
4719 ft.close();
4720 os.remove(tmpfilename);
4721 exec_time_end = time.time();
4722 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
4723 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4724 return returnval;
4726 if(not havepycurl):
4727 def download_from_url_to_file_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4728 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
4729 return returnval;
4731 if(havepycurl and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4732 def download_from_url_to_file_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4733 returnval = download_from_url_to_file_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
4734 return returnval;
4736 if(havepycurl and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0") and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0")):
4737 def download_from_url_to_file_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4738 returnval = download_from_url_to_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
4739 return returnval;
4741 def download_file_from_ftp_file(url):
4742 urlparts = urlparse.urlparse(url);
4743 file_name = os.path.basename(urlparts.path);
4744 file_dir = os.path.dirname(urlparts.path);
4745 if(urlparts.username is not None):
4746 ftp_username = urlparts.username;
4747 else:
4748 ftp_username = "anonymous";
4749 if(urlparts.password is not None):
4750 ftp_password = urlparts.password;
4751 elif(urlparts.password is None and urlparts.username=="anonymous"):
4752 ftp_password = "anonymous";
4753 else:
4754 ftp_password = "";
4755 if(urlparts.scheme=="ftp"):
4756 ftp = FTP();
4757 elif(urlparts.scheme=="ftps"):
4758 ftp = FTP_TLS();
4759 else:
4760 return False;
4761 if(urlparts.scheme=="http" or urlparts.scheme=="https"):
4762 return False;
4763 ftp_port = urlparts.port;
4764 if(urlparts.port is None):
4765 ftp_port = 21;
4766 try:
4767 ftp.connect(urlparts.hostname, ftp_port);
4768 except socket.gaierror:
4769 log.info("Error With URL "+httpurl);
4770 return False;
4771 except socket.timeout:
4772 log.info("Error With URL "+httpurl);
4773 return False;
4774 ftp.login(urlparts.username, urlparts.password);
4775 if(urlparts.scheme=="ftps"):
4776 ftp.prot_p();
4777 ftpfile = BytesIO();
4778 ftp.retrbinary("RETR "+urlparts.path, ftpfile.write);
4779 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
4780 ftp.close();
4781 ftpfile.seek(0, 0);
4782 return ftpfile;
4784 def download_file_from_ftp_string(url):
4785 ftpfile = download_file_from_ftp_file(url);
4786 return ftpfile.read();
4788 def download_from_url_with_ftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4789 global geturls_download_sleep, havezstd, havebrotli;
4790 if(sleep<0):
4791 sleep = geturls_download_sleep;
4792 if(timeout<=0):
4793 timeout = 10;
4794 urlparts = urlparse.urlparse(httpurl);
4795 if(isinstance(httpheaders, list)):
4796 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
4797 httpheaders = fix_header_names(httpheaders);
4798 if(httpuseragent is not None):
4799 if('User-Agent' in httpheaders):
4800 httpheaders['User-Agent'] = httpuseragent;
4801 else:
4802 httpuseragent.update({'User-Agent': httpuseragent});
4803 if(httpreferer is not None):
4804 if('Referer' in httpheaders):
4805 httpheaders['Referer'] = httpreferer;
4806 else:
4807 httpuseragent.update({'Referer': httpreferer});
4808 if(isinstance(httpheaders, dict)):
4809 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
4810 time.sleep(sleep);
4811 geturls_text = download_file_from_ftp_file(httpurl);
4812 if(not geturls_text):
4813 return False;
4814 downloadsize = None;
4815 if(downloadsize is not None):
4816 downloadsize = int(downloadsize);
4817 if downloadsize is None: downloadsize = 0;
4818 fulldatasize = 0;
4819 prevdownsize = 0;
4820 log.info("Downloading URL "+httpurl);
4821 with BytesIO() as strbuf:
4822 while True:
4823 databytes = geturls_text.read(buffersize);
4824 if not databytes: break;
4825 datasize = len(databytes);
4826 fulldatasize = datasize + fulldatasize;
4827 percentage = "";
4828 if(downloadsize>0):
4829 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4830 downloaddiff = fulldatasize - prevdownsize;
4831 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4832 prevdownsize = fulldatasize;
4833 strbuf.write(databytes);
4834 strbuf.seek(0);
4835 returnval_content = strbuf.read();
4836 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl, 'Code': None};
4837 geturls_text.close();
4838 return returnval;
4840 def download_from_url_file_with_ftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4841 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
4842 exec_time_start = time.time();
4843 myhash = hashlib.new("sha1");
4844 if(sys.version[0]=="2"):
4845 myhash.update(httpurl);
4846 myhash.update(str(buffersize));
4847 myhash.update(str(exec_time_start));
4848 if(sys.version[0]>="3"):
4849 myhash.update(httpurl.encode('utf-8'));
4850 myhash.update(str(buffersize).encode('utf-8'));
4851 myhash.update(str(exec_time_start).encode('utf-8'));
4852 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
4853 if(sleep<0):
4854 sleep = geturls_download_sleep;
4855 if(timeout<=0):
4856 timeout = 10;
4857 pretmpfilename = download_from_url_with_ftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
4858 if(not pretmpfilename):
4859 return False;
4860 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
4861 tmpfilename = f.name;
4862 try:
4863 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
4864 except AttributeError:
4865 try:
4866 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4867 except ValueError:
4868 pass;
4869 except ValueError:
4870 pass;
4871 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4872 f.write(pretmpfilename.get('Content'));
4873 f.close();
4874 exec_time_end = time.time();
4875 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
4876 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
4877 return returnval;
4879 def download_from_url_to_file_with_ftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4880 global geturls_download_sleep, havezstd, havebrotli;
4881 if(sleep<0):
4882 sleep = geturls_download_sleep;
4883 if(timeout<=0):
4884 timeout = 10;
4885 if(not outfile=="-"):
4886 outpath = outpath.rstrip(os.path.sep);
4887 filepath = os.path.realpath(outpath+os.path.sep+outfile);
4888 if(not os.path.exists(outpath)):
4889 os.makedirs(outpath);
4890 if(os.path.exists(outpath) and os.path.isfile(outpath)):
4891 return False;
4892 if(os.path.exists(filepath) and os.path.isdir(filepath)):
4893 return False;
4894 pretmpfilename = download_from_url_file_with_ftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4895 if(not pretmpfilename):
4896 return False;
4897 tmpfilename = pretmpfilename.get('Filename');
4898 downloadsize = int(os.path.getsize(tmpfilename));
4899 fulldatasize = 0;
4900 log.info("Moving file "+tmpfilename+" to "+filepath);
4901 exec_time_start = time.time();
4902 shutil.move(tmpfilename, filepath);
4903 exec_time_end = time.time();
4904 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
4905 if(os.path.exists(tmpfilename)):
4906 os.remove(tmpfilename);
4907 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4908 if(outfile=="-"):
4909 pretmpfilename = download_from_url_file_with_ftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4910 tmpfilename = pretmpfilename.get('Filename');
4911 downloadsize = int(os.path.getsize(tmpfilename));
4912 fulldatasize = 0;
4913 prevdownsize = 0;
4914 exec_time_start = time.time();
4915 with open(tmpfilename, 'rb') as ft:
4916 f = BytesIO();
4917 while True:
4918 databytes = ft.read(buffersize[1]);
4919 if not databytes: break;
4920 datasize = len(databytes);
4921 fulldatasize = datasize + fulldatasize;
4922 percentage = "";
4923 if(downloadsize>0):
4924 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4925 downloaddiff = fulldatasize - prevdownsize;
4926 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4927 prevdownsize = fulldatasize;
4928 f.write(databytes);
4929 f.seek(0);
4930 fdata = f.getvalue();
4931 f.close();
4932 ft.close();
4933 os.remove(tmpfilename);
4934 exec_time_end = time.time();
4935 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
4936 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4937 return returnval;
4939 def upload_file_to_ftp_file(ftpfile, url):
4940 urlparts = urlparse.urlparse(url);
4941 file_name = os.path.basename(urlparts.path);
4942 file_dir = os.path.dirname(urlparts.path);
4943 if(urlparts.username is not None):
4944 ftp_username = urlparts.username;
4945 else:
4946 ftp_username = "anonymous";
4947 if(urlparts.password is not None):
4948 ftp_password = urlparts.password;
4949 elif(urlparts.password is None and urlparts.username=="anonymous"):
4950 ftp_password = "anonymous";
4951 else:
4952 ftp_password = "";
4953 if(urlparts.scheme=="ftp"):
4954 ftp = FTP();
4955 elif(urlparts.scheme=="ftps"):
4956 ftp = FTP_TLS();
4957 else:
4958 return False;
4959 if(urlparts.scheme=="http" or urlparts.scheme=="https"):
4960 return False;
4961 ftp_port = urlparts.port;
4962 if(urlparts.port is None):
4963 ftp_port = 21;
4964 try:
4965 ftp.connect(urlparts.hostname, ftp_port);
4966 except socket.gaierror:
4967 log.info("Error With URL "+httpurl);
4968 return False;
4969 except socket.timeout:
4970 log.info("Error With URL "+httpurl);
4971 return False;
4972 ftp.login(urlparts.username, urlparts.password);
4973 if(urlparts.scheme=="ftps"):
4974 ftp.prot_p();
4975 ftp.storbinary("STOR "+urlparts.path, ftpfile);
4976 ftp.close();
4977 ftpfile.seek(0, 0);
4978 return ftpfile;
4980 def upload_file_to_ftp_string(ftpstring, url):
4981 ftpfileo = BytesIO(ftpstring);
4982 ftpfile = upload_file_to_ftp_file(ftpfileo, url);
4983 ftpfileo.close();
4984 return ftpfile;
4986 if(haveparamiko):
4987 def download_file_from_sftp_file(url):
4988 urlparts = urlparse.urlparse(url);
4989 file_name = os.path.basename(urlparts.path);
4990 file_dir = os.path.dirname(urlparts.path);
4991 if(urlparts.scheme=="http" or urlparts.scheme=="https"):
4992 return False;
4993 sftp_port = urlparts.port;
4994 if(urlparts.port is None):
4995 sftp_port = 22;
4996 else:
4997 sftp_port = urlparts.port;
4998 if(urlparts.username is not None):
4999 sftp_username = urlparts.username;
5000 else:
5001 sftp_username = "anonymous";
5002 if(urlparts.password is not None):
5003 sftp_password = urlparts.password;
5004 elif(urlparts.password is None and urlparts.username=="anonymous"):
5005 sftp_password = "anonymous";
5006 else:
5007 sftp_password = "";
5008 if(urlparts.scheme!="sftp"):
5009 return False;
5010 ssh = paramiko.SSHClient();
5011 ssh.load_system_host_keys();
5012 ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy());
5013 try:
5014 ssh.connect(urlparts.hostname, port=sftp_port, username=urlparts.username, password=urlparts.password);
5015 except paramiko.ssh_exception.SSHException:
5016 return False;
5017 except socket.gaierror:
5018 log.info("Error With URL "+httpurl);
5019 return False;
5020 except socket.timeout:
5021 log.info("Error With URL "+httpurl);
5022 return False;
5023 sftp = ssh.open_sftp();
5024 sftpfile = BytesIO();
5025 sftp.getfo(urlparts.path, sftpfile);
5026 sftp.close();
5027 ssh.close();
5028 sftpfile.seek(0, 0);
5029 return sftpfile;
5030 else:
5031 def download_file_from_sftp_file(url):
5032 return False;
5034 if(haveparamiko):
5035 def download_file_from_sftp_string(url):
5036 sftpfile = download_file_from_sftp_file(url);
5037 return sftpfile.read();
5038 else:
5039 def download_file_from_ftp_string(url):
5040 return False;
5042 if(haveparamiko):
5043 def download_from_url_with_sftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
5044 global geturls_download_sleep, havezstd, havebrotli;
5045 if(sleep<0):
5046 sleep = geturls_download_sleep;
5047 if(timeout<=0):
5048 timeout = 10;
5049 urlparts = urlparse.urlparse(httpurl);
5050 if(isinstance(httpheaders, list)):
5051 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
5052 httpheaders = fix_header_names(httpheaders);
5053 if(httpuseragent is not None):
5054 if('User-Agent' in httpheaders):
5055 httpheaders['User-Agent'] = httpuseragent;
5056 else:
5057 httpuseragent.update({'User-Agent': httpuseragent});
5058 if(httpreferer is not None):
5059 if('Referer' in httpheaders):
5060 httpheaders['Referer'] = httpreferer;
5061 else:
5062 httpuseragent.update({'Referer': httpreferer});
5063 if(isinstance(httpheaders, dict)):
5064 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
5065 time.sleep(sleep);
5066 geturls_text = download_file_from_sftp_file(httpurl);
5067 if(not geturls_text):
5068 return False;
5069 downloadsize = None;
5070 if(downloadsize is not None):
5071 downloadsize = int(downloadsize);
5072 if downloadsize is None: downloadsize = 0;
5073 fulldatasize = 0;
5074 prevdownsize = 0;
5075 log.info("Downloading URL "+httpurl);
5076 with BytesIO() as strbuf:
5077 while True:
5078 databytes = geturls_text.read(buffersize);
5079 if not databytes: break;
5080 datasize = len(databytes);
5081 fulldatasize = datasize + fulldatasize;
5082 percentage = "";
5083 if(downloadsize>0):
5084 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
5085 downloaddiff = fulldatasize - prevdownsize;
5086 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
5087 prevdownsize = fulldatasize;
5088 strbuf.write(databytes);
5089 strbuf.seek(0);
5090 returnval_content = strbuf.read();
5091 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl, 'Code': None};
5092 geturls_text.close();
5093 return returnval;
5095 if(not haveparamiko):
5096 def download_from_url_with_sftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
5097 return False;
5099 if(haveparamiko):
5100 def download_from_url_file_with_sftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
5101 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
5102 exec_time_start = time.time();
5103 myhash = hashlib.new("sha1");
5104 if(sys.version[0]=="2"):
5105 myhash.update(httpurl);
5106 myhash.update(str(buffersize));
5107 myhash.update(str(exec_time_start));
5108 if(sys.version[0]>="3"):
5109 myhash.update(httpurl.encode('utf-8'));
5110 myhash.update(str(buffersize).encode('utf-8'));
5111 myhash.update(str(exec_time_start).encode('utf-8'));
5112 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
5113 if(sleep<0):
5114 sleep = geturls_download_sleep;
5115 if(timeout<=0):
5116 timeout = 10;
5117 pretmpfilename = download_from_url_with_sftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
5118 if(not pretmpfilename):
5119 return False;
5120 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
5121 tmpfilename = f.name;
5122 try:
5123 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
5124 except AttributeError:
5125 try:
5126 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
5127 except ValueError:
5128 pass;
5129 except ValueError:
5130 pass;
5131 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
5132 f.write(pretmpfilename.get('Content'));
5133 f.close();
5134 exec_time_end = time.time();
5135 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
5136 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
5137 return returnval;
5139 if(not haveparamiko):
5140 def download_from_url_file_with_sftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
5141 return False;
5143 if(haveparamiko):
5144 def download_from_url_to_file_with_sftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
5145 global geturls_download_sleep, havezstd, havebrotli;
5146 if(sleep<0):
5147 sleep = geturls_download_sleep;
5148 if(timeout<=0):
5149 timeout = 10;
5150 if(not outfile=="-"):
5151 outpath = outpath.rstrip(os.path.sep);
5152 filepath = os.path.realpath(outpath+os.path.sep+outfile);
5153 if(not os.path.exists(outpath)):
5154 os.makedirs(outpath);
5155 if(os.path.exists(outpath) and os.path.isfile(outpath)):
5156 return False;
5157 if(os.path.exists(filepath) and os.path.isdir(filepath)):
5158 return False;
5159 pretmpfilename = download_from_url_file_with_sftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
5160 if(not pretmpfilename):
5161 return False;
5162 tmpfilename = pretmpfilename.get('Filename');
5163 downloadsize = int(os.path.getsize(tmpfilename));
5164 fulldatasize = 0;
5165 log.info("Moving file "+tmpfilename+" to "+filepath);
5166 exec_time_start = time.time();
5167 shutil.move(tmpfilename, filepath);
5168 exec_time_end = time.time();
5169 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
5170 if(os.path.exists(tmpfilename)):
5171 os.remove(tmpfilename);
5172 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
5173 if(outfile=="-"):
5174 pretmpfilename = download_from_url_file_with_sftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
5175 tmpfilename = pretmpfilename.get('Filename');
5176 downloadsize = int(os.path.getsize(tmpfilename));
5177 fulldatasize = 0;
5178 prevdownsize = 0;
5179 exec_time_start = time.time();
5180 with open(tmpfilename, 'rb') as ft:
5181 f = BytesIO();
5182 while True:
5183 databytes = ft.read(buffersize[1]);
5184 if not databytes: break;
5185 datasize = len(databytes);
5186 fulldatasize = datasize + fulldatasize;
5187 percentage = "";
5188 if(downloadsize>0):
5189 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
5190 downloaddiff = fulldatasize - prevdownsize;
5191 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
5192 prevdownsize = fulldatasize;
5193 f.write(databytes);
5194 f.seek(0);
5195 fdata = f.getvalue();
5196 f.close();
5197 ft.close();
5198 os.remove(tmpfilename);
5199 exec_time_end = time.time();
5200 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
5201 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
5202 return returnval;
5204 if(not haveparamiko):
5205 def download_from_url_to_file_with_sftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
5206 return False;
5208 if(haveparamiko):
5209 def upload_file_to_sftp_file(sftpfile, url):
5210 urlparts = urlparse.urlparse(url);
5211 file_name = os.path.basename(urlparts.path);
5212 file_dir = os.path.dirname(urlparts.path);
5213 sftp_port = urlparts.port;
5214 if(urlparts.scheme=="http" or urlparts.scheme=="https"):
5215 return False;
5216 if(urlparts.port is None):
5217 sftp_port = 22;
5218 else:
5219 sftp_port = urlparts.port;
5220 if(urlparts.username is not None):
5221 sftp_username = urlparts.username;
5222 else:
5223 sftp_username = "anonymous";
5224 if(urlparts.password is not None):
5225 sftp_password = urlparts.password;
5226 elif(urlparts.password is None and urlparts.username=="anonymous"):
5227 sftp_password = "anonymous";
5228 else:
5229 sftp_password = "";
5230 if(urlparts.scheme!="sftp"):
5231 return False;
5232 ssh = paramiko.SSHClient();
5233 ssh.load_system_host_keys();
5234 ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy());
5235 try:
5236 ssh.connect(urlparts.hostname, port=sftp_port, username=urlparts.username, password=urlparts.password);
5237 except paramiko.ssh_exception.SSHException:
5238 return False;
5239 except socket.gaierror:
5240 log.info("Error With URL "+httpurl);
5241 return False;
5242 except socket.timeout:
5243 log.info("Error With URL "+httpurl);
5244 return False;
5245 sftp = ssh.open_sftp();
5246 sftp.putfo(sftpfile, urlparts.path);
5247 sftp.close();
5248 ssh.close();
5249 sftpfile.seek(0, 0);
5250 return sftpfile;
5251 else:
5252 def upload_file_to_sftp_file(sftpfile, url):
5253 return False;
5255 if(haveparamiko):
5256 def upload_file_to_sftp_string(sftpstring, url):
5257 sftpfileo = BytesIO(sftpstring);
5258 sftpfile = upload_file_to_sftp_files(ftpfileo, url);
5259 sftpfileo.close();
5260 return sftpfile;
5261 else:
5262 def upload_file_to_sftp_string(url):
5263 return False;
5266 if(havepysftp):
5267 def download_file_from_pysftp_file(url):
5268 urlparts = urlparse.urlparse(url);
5269 file_name = os.path.basename(urlparts.path);
5270 file_dir = os.path.dirname(urlparts.path);
5271 if(urlparts.scheme=="http" or urlparts.scheme=="https"):
5272 return False;
5273 sftp_port = urlparts.port;
5274 if(urlparts.port is None):
5275 sftp_port = 22;
5276 else:
5277 sftp_port = urlparts.port;
5278 if(urlparts.username is not None):
5279 sftp_username = urlparts.username;
5280 else:
5281 sftp_username = "anonymous";
5282 if(urlparts.password is not None):
5283 sftp_password = urlparts.password;
5284 elif(urlparts.password is None and urlparts.username=="anonymous"):
5285 sftp_password = "anonymous";
5286 else:
5287 sftp_password = "";
5288 if(urlparts.scheme!="sftp"):
5289 return False;
5290 try:
5291 pysftp.Connection(urlparts.hostname, port=sftp_port, username=urlparts.username, password=urlparts.password);
5292 except paramiko.ssh_exception.SSHException:
5293 return False;
5294 except socket.gaierror:
5295 log.info("Error With URL "+httpurl);
5296 return False;
5297 except socket.timeout:
5298 log.info("Error With URL "+httpurl);
5299 return False;
5300 sftp = ssh.open_sftp();
5301 sftpfile = BytesIO();
5302 sftp.getfo(urlparts.path, sftpfile);
5303 sftp.close();
5304 ssh.close();
5305 sftpfile.seek(0, 0);
5306 return sftpfile;
5307 else:
5308 def download_file_from_pysftp_file(url):
5309 return False;
5311 if(havepysftp):
5312 def download_file_from_pysftp_string(url):
5313 sftpfile = download_file_from_pysftp_file(url);
5314 return sftpfile.read();
5315 else:
5316 def download_file_from_ftp_string(url):
5317 return False;
5319 if(havepysftp):
5320 def download_from_url_with_pysftp(httpurl, httpheaders=geturls_headers, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
5321 global geturls_download_sleep, havezstd, havebrotli;
5322 if(sleep<0):
5323 sleep = geturls_download_sleep;
5324 if(timeout<=0):
5325 timeout = 10;
5326 urlparts = urlparse.urlparse(httpurl);
5327 if(isinstance(httpheaders, list)):
5328 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
5329 httpheaders = fix_header_names(httpheaders);
5330 if(isinstance(httpheaders, dict)):
5331 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
5332 time.sleep(sleep);
5333 geturls_text = download_file_from_pysftp_file(httpurl);
5334 if(not geturls_text):
5335 return False;
5336 downloadsize = None;
5337 if(downloadsize is not None):
5338 downloadsize = int(downloadsize);
5339 if downloadsize is None: downloadsize = 0;
5340 fulldatasize = 0;
5341 prevdownsize = 0;
5342 log.info("Downloading URL "+httpurl);
5343 with BytesIO() as strbuf:
5344 while True:
5345 databytes = geturls_text.read(buffersize);
5346 if not databytes: break;
5347 datasize = len(databytes);
5348 fulldatasize = datasize + fulldatasize;
5349 percentage = "";
5350 if(downloadsize>0):
5351 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
5352 downloaddiff = fulldatasize - prevdownsize;
5353 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
5354 prevdownsize = fulldatasize;
5355 strbuf.write(databytes);
5356 strbuf.seek(0);
5357 returnval_content = strbuf.read();
5358 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl, 'Code': None};
5359 geturls_text.close();
5360 return returnval;
5362 if(not havepysftp):
5363 def download_from_url_with_pysftp(httpurl, httpheaders=geturls_headers, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
5364 return False;
5366 if(havepysftp):
5367 def download_from_url_file_with_pysftp(httpurl, httpheaders=geturls_headers, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
5368 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
5369 exec_time_start = time.time();
5370 myhash = hashlib.new("sha1");
5371 if(sys.version[0]=="2"):
5372 myhash.update(httpurl);
5373 myhash.update(str(buffersize));
5374 myhash.update(str(exec_time_start));
5375 if(sys.version[0]>="3"):
5376 myhash.update(httpurl.encode('utf-8'));
5377 myhash.update(str(buffersize).encode('utf-8'));
5378 myhash.update(str(exec_time_start).encode('utf-8'));
5379 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
5380 if(sleep<0):
5381 sleep = geturls_download_sleep;
5382 if(timeout<=0):
5383 timeout = 10;
5384 pretmpfilename = download_from_url_with_pysftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
5385 if(not pretmpfilename):
5386 return False;
5387 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
5388 tmpfilename = f.name;
5389 try:
5390 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
5391 except AttributeError:
5392 try:
5393 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
5394 except ValueError:
5395 pass;
5396 except ValueError:
5397 pass;
5398 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
5399 f.write(pretmpfilename.get('Content'));
5400 f.close();
5401 exec_time_end = time.time();
5402 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
5403 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
5404 return returnval;
5406 if(not havepysftp):
5407 def download_from_url_file_with_pysftp(httpurl, httpheaders=geturls_headers, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
5408 return False;
5410 if(havepysftp):
5411 def download_from_url_to_file_with_pysftp(httpurl, httpheaders=geturls_headers, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
5412 global geturls_download_sleep, havezstd, havebrotli;
5413 if(sleep<0):
5414 sleep = geturls_download_sleep;
5415 if(timeout<=0):
5416 timeout = 10;
5417 if(not outfile=="-"):
5418 outpath = outpath.rstrip(os.path.sep);
5419 filepath = os.path.realpath(outpath+os.path.sep+outfile);
5420 if(not os.path.exists(outpath)):
5421 os.makedirs(outpath);
5422 if(os.path.exists(outpath) and os.path.isfile(outpath)):
5423 return False;
5424 if(os.path.exists(filepath) and os.path.isdir(filepath)):
5425 return False;
5426 pretmpfilename = download_from_url_file_with_pysftp(httpurl, httpheaders, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
5427 if(not pretmpfilename):
5428 return False;
5429 tmpfilename = pretmpfilename.get('Filename');
5430 downloadsize = int(os.path.getsize(tmpfilename));
5431 fulldatasize = 0;
5432 log.info("Moving file "+tmpfilename+" to "+filepath);
5433 exec_time_start = time.time();
5434 shutil.move(tmpfilename, filepath);
5435 exec_time_end = time.time();
5436 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
5437 if(os.path.exists(tmpfilename)):
5438 os.remove(tmpfilename);
5439 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
5440 if(outfile=="-"):
5441 pretmpfilename = download_from_url_file_with_pysftp(httpurl, httpheaders, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
5442 tmpfilename = pretmpfilename.get('Filename');
5443 downloadsize = int(os.path.getsize(tmpfilename));
5444 fulldatasize = 0;
5445 prevdownsize = 0;
5446 exec_time_start = time.time();
5447 with open(tmpfilename, 'rb') as ft:
5448 f = BytesIO();
5449 while True:
5450 databytes = ft.read(buffersize[1]);
5451 if not databytes: break;
5452 datasize = len(databytes);
5453 fulldatasize = datasize + fulldatasize;
5454 percentage = "";
5455 if(downloadsize>0):
5456 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
5457 downloaddiff = fulldatasize - prevdownsize;
5458 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
5459 prevdownsize = fulldatasize;
5460 f.write(databytes);
5461 f.seek(0);
5462 fdata = f.getvalue();
5463 f.close();
5464 ft.close();
5465 os.remove(tmpfilename);
5466 exec_time_end = time.time();
5467 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
5468 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
5469 return returnval;
5471 if(not havepysftp):
5472 def download_from_url_to_file_with_pysftp(httpurl, httpheaders=geturls_headers, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
5473 return False;
5475 if(havepysftp):
5476 def upload_file_to_pysftp_file(sftpfile, url):
5477 urlparts = urlparse.urlparse(url);
5478 file_name = os.path.basename(urlparts.path);
5479 file_dir = os.path.dirname(urlparts.path);
5480 sftp_port = urlparts.port;
5481 if(urlparts.scheme=="http" or urlparts.scheme=="https"):
5482 return False;
5483 if(urlparts.port is None):
5484 sftp_port = 22;
5485 else:
5486 sftp_port = urlparts.port;
5487 if(urlparts.username is not None):
5488 sftp_username = urlparts.username;
5489 else:
5490 sftp_username = "anonymous";
5491 if(urlparts.password is not None):
5492 sftp_password = urlparts.password;
5493 elif(urlparts.password is None and urlparts.username=="anonymous"):
5494 sftp_password = "anonymous";
5495 else:
5496 sftp_password = "";
5497 if(urlparts.scheme!="sftp"):
5498 return False;
5499 try:
5500 pysftp.Connection(urlparts.hostname, port=sftp_port, username=urlparts.username, password=urlparts.password);
5501 except paramiko.ssh_exception.SSHException:
5502 return False;
5503 except socket.gaierror:
5504 log.info("Error With URL "+httpurl);
5505 return False;
5506 except socket.timeout:
5507 log.info("Error With URL "+httpurl);
5508 return False;
5509 sftp = ssh.open_sftp();
5510 sftp.putfo(sftpfile, urlparts.path);
5511 sftp.close();
5512 ssh.close();
5513 sftpfile.seek(0, 0);
5514 return sftpfile;
5515 else:
5516 def upload_file_to_pysftp_file(sftpfile, url):
5517 return False;
5519 if(havepysftp):
5520 def upload_file_to_pysftp_string(sftpstring, url):
5521 sftpfileo = BytesIO(sftpstring);
5522 sftpfile = upload_file_to_pysftp_files(ftpfileo, url);
5523 sftpfileo.close();
5524 return sftpfile;
5525 else:
5526 def upload_file_to_pysftp_string(url):
5527 return False;