Update pywwwget.py
[PyWWW-Get.git] / pywwwgetold.py
blobb0f00288fd3dea9483ae72c7ff5044ac76381b6c
1 #!/usr/bin/env python
3 '''
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwgetold.py - Last Update: 10/5/2023 Ver. 2.0.2 RC 1 - Author: cooldude2k $
16 '''
18 from __future__ import division, absolute_import, print_function;
19 import re, os, sys, hashlib, shutil, platform, tempfile, urllib, zlib, time, argparse, cgi, subprocess, socket, email.utils, datetime, time;
20 import logging as log;
21 from ftplib import FTP, FTP_TLS;
22 from base64 import b64encode;
23 haverequests = False;
24 try:
25 import requests;
26 haverequests = True;
27 except ImportError:
28 haverequests = False;
29 havemechanize = False;
30 try:
31 import mechanize;
32 havemechanize = True;
33 except ImportError:
34 havemechanize = False;
35 havepycurl = False;
36 try:
37 import pycurl;
38 havepycurl = True;
39 except ImportError:
40 havepycurl = False;
41 haveparamiko = False;
42 try:
43 import paramiko;
44 haveparamiko = True;
45 except ImportError:
46 haveparamiko = False;
47 havepysftp = False;
48 try:
49 import pysftp;
50 havepysftp = True;
51 except ImportError:
52 havepysftp = False;
53 haveurllib3 = False;
54 try:
55 import urllib3;
56 haveurllib3 = True;
57 except ImportError:
58 haveurllib3 = False;
59 havehttplib2 = False;
60 try:
61 import httplib2;
62 from httplib2 import HTTPConnectionWithTimeout, HTTPSConnectionWithTimeout;
63 havehttplib2 = True;
64 except ImportError:
65 havehttplib2 = False;
66 havehttpx = False;
67 try:
68 import httpx;
69 havehttpx = True;
70 except ImportError:
71 havehttpx = False;
72 havehttpcore = False;
73 try:
74 import httpcore;
75 havehttpcore = True;
76 except ImportError:
77 havehttpcore = False;
78 haveaiohttp = False;
79 try:
80 import aiohttp;
81 haveaiohttp = True;
82 except ImportError:
83 haveaiohttp = False;
84 havebrotli = False;
85 try:
86 import brotli;
87 havebrotli = True;
88 except ImportError:
89 havebrotli = False;
90 havezstd = False;
91 try:
92 import zstandard;
93 havezstd = True;
94 except ImportError:
95 havezstd = False;
96 havelzma = False;
97 try:
98 import lzma;
99 havelzma = True;
100 except ImportError:
101 havelzma = False;
102 if(sys.version[0]=="2"):
103 try:
104 from io import StringIO, BytesIO;
105 except ImportError:
106 try:
107 from cStringIO import StringIO;
108 from cStringIO import StringIO as BytesIO;
109 except ImportError:
110 from StringIO import StringIO;
111 from StringIO import StringIO as BytesIO;
112 # From http://python-future.org/compatible_idioms.html
113 from urlparse import urlparse, urlunparse, urlsplit, urlunsplit, urljoin;
114 from urllib import urlencode;
115 from urllib import urlopen as urlopenalt;
116 from urllib2 import urlopen, Request, install_opener, HTTPError, URLError, build_opener, HTTPCookieProcessor;
117 import urlparse, cookielib;
118 from httplib import HTTPConnection, HTTPSConnection;
119 if(sys.version[0]>="3"):
120 from io import StringIO, BytesIO;
121 # From http://python-future.org/compatible_idioms.html
122 from urllib.parse import urlparse, urlunparse, urlsplit, urlunsplit, urljoin, urlencode;
123 from urllib.request import urlopen, Request, install_opener, build_opener, HTTPCookieProcessor;
124 from urllib.error import HTTPError, URLError;
125 import urllib.parse as urlparse;
126 import http.cookiejar as cookielib;
127 from http.client import HTTPConnection, HTTPSConnection;
129 __program_name__ = "PyWWW-Get";
130 __program_alt_name__ = "PyWWWGet";
131 __program_small_name__ = "wwwget";
132 __project__ = __program_name__;
133 __project_url__ = "https://github.com/GameMaker2k/PyWWW-Get";
134 __version_info__ = (2, 0, 2, "RC 1", 1);
135 __version_date_info__ = (2023, 10, 5, "RC 1", 1);
136 __version_date__ = str(__version_date_info__[0])+"."+str(__version_date_info__[1]).zfill(2)+"."+str(__version_date_info__[2]).zfill(2);
137 __revision__ = __version_info__[3];
138 __revision_id__ = "$Id$";
139 if(__version_info__[4] is not None):
140 __version_date_plusrc__ = __version_date__+"-"+str(__version_date_info__[4]);
141 if(__version_info__[4] is None):
142 __version_date_plusrc__ = __version_date__;
143 if(__version_info__[3] is not None):
144 __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2])+" "+str(__version_info__[3]);
145 if(__version_info__[3] is None):
146 __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2]);
148 tmpfileprefix = "py"+str(sys.version_info[0])+__program_small_name__+str(__version_info__[0])+"-";
149 tmpfilesuffix = "-";
150 pytempdir = tempfile.gettempdir();
152 PyBitness = platform.architecture();
153 if(PyBitness=="32bit" or PyBitness=="32"):
154 PyBitness = "32";
155 elif(PyBitness=="64bit" or PyBitness=="64"):
156 PyBitness = "64";
157 else:
158 PyBitness = "32";
160 compression_supported_list = ['identity', 'gzip', 'deflate', 'bzip2'];
161 if(havebrotli):
162 compression_supported_list.append('br');
163 if(havezstd):
164 compression_supported_list.append('zstd');
165 if(havelzma):
166 compression_supported_list.append('lzma');
167 compression_supported_list.append('xz');
168 compression_supported = ', '.join(compression_supported_list);
170 geturls_cj = cookielib.CookieJar();
171 windowsNT4_ua_string = "Windows NT 4.0";
172 windowsNT4_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
173 windows2k_ua_string = "Windows NT 5.0";
174 windows2k_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
175 windowsXP_ua_string = "Windows NT 5.1";
176 windowsXP_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
177 windowsXP64_ua_string = "Windows NT 5.2; Win64; x64";
178 windowsXP64_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
179 windows7_ua_string = "Windows NT 6.1; Win64; x64";
180 windows7_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
181 windows8_ua_string = "Windows NT 6.2; Win64; x64";
182 windows8_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
183 windows81_ua_string = "Windows NT 6.3; Win64; x64";
184 windows81_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
185 windows10_ua_string = "Windows NT 10.0; Win64; x64";
186 windows10_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
187 windows11_ua_string = "Windows NT 11.0; Win64; x64";
188 windows11_ua_addon = {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
189 geturls_ua_firefox_windows7 = "Mozilla/5.0 ("+windows7_ua_string+"; rv:109.0) Gecko/20100101 Firefox/117.0";
190 geturls_ua_seamonkey_windows7 = "Mozilla/5.0 ("+windows7_ua_string+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
191 geturls_ua_chrome_windows7 = "Mozilla/5.0 ("+windows7_ua_string+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
192 geturls_ua_chromium_windows7 = "Mozilla/5.0 ("+windows7_ua_string+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
193 geturls_ua_palemoon_windows7 = "Mozilla/5.0 ("+windows7_ua_string+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
194 geturls_ua_opera_windows7 = "Mozilla/5.0 ("+windows7_ua_string+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
195 geturls_ua_vivaldi_windows7 = "Mozilla/5.0 ("+windows7_ua_string+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
196 geturls_ua_internet_explorer_windows7 = "Mozilla/5.0 ("+windows7_ua_string+"; Trident/7.0; rv:11.0) like Gecko";
197 geturls_ua_microsoft_edge_windows7 = "Mozilla/5.0 ("+windows7_ua_string+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
198 geturls_ua_pywwwget_python = "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname=__project__, prover=__version__, prourl=__project_url__);
199 if(platform.python_implementation()!=""):
200 py_implementation = platform.python_implementation();
201 if(platform.python_implementation()==""):
202 py_implementation = "Python";
203 geturls_ua_pywwwget_python_alt = "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver=platform.system()+" "+platform.release(), archtype=platform.machine(), prourl=__project_url__, pyimp=py_implementation, pyver=platform.python_version(), proname=__project__, prover=__version__);
204 geturls_ua_googlebot_google = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
205 geturls_ua_googlebot_google_old = "Googlebot/2.1 (+http://www.google.com/bot.html)";
206 geturls_ua = geturls_ua_firefox_windows7;
207 geturls_headers_firefox_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
208 geturls_headers_seamonkey_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
209 geturls_headers_chrome_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
210 geturls_headers_chrome_windows7.update(windows7_ua_addon);
211 geturls_headers_chromium_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
212 geturls_headers_chromium_windows7.update(windows7_ua_addon);
213 geturls_headers_palemoon_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
214 geturls_headers_opera_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
215 geturls_headers_opera_windows7.update(windows7_ua_addon);
216 geturls_headers_vivaldi_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
217 geturls_headers_vivaldi_windows7.update(windows7_ua_addon);
218 geturls_headers_internet_explorer_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
219 geturls_headers_microsoft_edge_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
220 geturls_headers_microsoft_edge_windows7.update(windows7_ua_addon);
221 geturls_headers_pywwwget_python = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__+"\";v=\""+str(__version__)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation+"\";v=\""+str(platform.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__), 'SEC-CH-UA-PLATFORM': ""+py_implementation+"", 'SEC-CH-UA-ARCH': ""+platform.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__), 'SEC-CH-UA-BITNESS': str(PyBitness)};
222 geturls_headers_pywwwget_python_alt = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__+"\";v=\""+str(__version__)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation+"\";v=\""+str(platform.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__), 'SEC-CH-UA-PLATFORM': ""+py_implementation+"", 'SEC-CH-UA-ARCH': ""+platform.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__), 'SEC-CH-UA-BITNESS': str(PyBitness)};
223 geturls_headers_googlebot_google = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
224 geturls_headers_googlebot_google_old = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
225 geturls_headers = geturls_headers_firefox_windows7;
226 geturls_download_sleep = 0;
228 def verbose_printout(dbgtxt, outtype="log", dbgenable=True, dgblevel=20):
229 if(outtype=="print" and dbgenable):
230 print(dbgtxt);
231 return True;
232 elif(outtype=="log" and dbgenable):
233 logging.info(dbgtxt);
234 return True;
235 elif(outtype=="warning" and dbgenable):
236 logging.warning(dbgtxt);
237 return True;
238 elif(outtype=="error" and dbgenable):
239 logging.error(dbgtxt);
240 return True;
241 elif(outtype=="critical" and dbgenable):
242 logging.critical(dbgtxt);
243 return True;
244 elif(outtype=="exception" and dbgenable):
245 logging.exception(dbgtxt);
246 return True;
247 elif(outtype=="logalt" and dbgenable):
248 logging.log(dgblevel, dbgtxt);
249 return True;
250 elif(outtype=="debug" and dbgenable):
251 logging.debug(dbgtxt);
252 return True;
253 elif(not dbgenable):
254 return True;
255 else:
256 return False;
257 return False;
259 def verbose_printout_return(dbgtxt, outtype="log", dbgenable=True, dgblevel=20):
260 dbgout = verbose_printout(dbgtxt, outtype, dbgenable, dgblevel);
261 if(not dbgout):
262 return False;
263 return dbgtxt;
265 def add_url_param(url, **params):
266 n=3;
267 parts = list(urlparse.urlsplit(url));
268 d = dict(cgi.parse_qsl(parts[n])); # use cgi.parse_qs for list values
269 d.update(params);
270 parts[n]=urlencode(d);
271 return urlparse.urlunsplit(parts);
273 os.environ["PATH"] = os.environ["PATH"] + os.pathsep + os.path.dirname(os.path.realpath(__file__)) + os.pathsep + os.getcwd();
274 def which_exec(execfile):
275 for path in os.environ["PATH"].split(":"):
276 if os.path.exists(path + "/" + execfile):
277 return path + "/" + execfile;
279 def listize(varlist):
280 il = 0;
281 ix = len(varlist);
282 ilx = 1;
283 newlistreg = {};
284 newlistrev = {};
285 newlistfull = {};
286 while(il < ix):
287 newlistreg.update({ilx: varlist[il]});
288 newlistrev.update({varlist[il]: ilx});
289 ilx = ilx + 1;
290 il = il + 1;
291 newlistfull = {1: newlistreg, 2: newlistrev, 'reg': newlistreg, 'rev': newlistrev};
292 return newlistfull;
294 def twolistize(varlist):
295 il = 0;
296 ix = len(varlist);
297 ilx = 1;
298 newlistnamereg = {};
299 newlistnamerev = {};
300 newlistdescreg = {};
301 newlistdescrev = {};
302 newlistfull = {};
303 while(il < ix):
304 newlistnamereg.update({ilx: varlist[il][0].strip()});
305 newlistnamerev.update({varlist[il][0].strip(): ilx});
306 newlistdescreg.update({ilx: varlist[il][1].strip()});
307 newlistdescrev.update({varlist[il][1].strip(): ilx});
308 ilx = ilx + 1;
309 il = il + 1;
310 newlistnametmp = {1: newlistnamereg, 2: newlistnamerev, 'reg': newlistnamereg, 'rev': newlistnamerev};
311 newlistdesctmp = {1: newlistdescreg, 2: newlistdescrev, 'reg': newlistdescreg, 'rev': newlistdescrev};
312 newlistfull = {1: newlistnametmp, 2: newlistdesctmp, 'name': newlistnametmp, 'desc': newlistdesctmp}
313 return newlistfull;
315 def arglistize(proexec, *varlist):
316 il = 0;
317 ix = len(varlist);
318 ilx = 1;
319 newarglist = [proexec];
320 while(il < ix):
321 if varlist[il][0] is not None:
322 newarglist.append(varlist[il][0]);
323 if varlist[il][1] is not None:
324 newarglist.append(varlist[il][1]);
325 il = il + 1;
326 return newarglist;
328 def fix_header_names(header_dict):
329 if(sys.version[0]=="2"):
330 header_dict = {k.title(): v for k, v in header_dict.iteritems()};
331 if(sys.version[0]>="3"):
332 header_dict = {k.title(): v for k, v in header_dict.items()};
333 return header_dict;
335 # hms_string by ArcGIS Python Recipes
336 # https://arcpy.wordpress.com/2012/04/20/146/
337 def hms_string(sec_elapsed):
338 h = int(sec_elapsed / (60 * 60));
339 m = int((sec_elapsed % (60 * 60)) / 60);
340 s = sec_elapsed % 60.0;
341 return "{}:{:>02}:{:>05.2f}".format(h, m, s);
343 # get_readable_size by Lipis
344 # http://stackoverflow.com/posts/14998888/revisions
345 def get_readable_size(bytes, precision=1, unit="IEC"):
346 unit = unit.upper();
347 if(unit!="IEC" and unit!="SI"):
348 unit = "IEC";
349 if(unit=="IEC"):
350 units = [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
351 unitswos = ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
352 unitsize = 1024.0;
353 if(unit=="SI"):
354 units = [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
355 unitswos = ["B","kB","MB","GB","TB","PB","EB","ZB"];
356 unitsize = 1000.0;
357 return_val = {};
358 orgbytes = bytes;
359 for unit in units:
360 if abs(bytes) < unitsize:
361 strformat = "%3."+str(precision)+"f%s";
362 pre_return_val = (strformat % (bytes, unit));
363 pre_return_val = re.sub(r"([0]+) ([A-Za-z]+)", r" \2", pre_return_val);
364 pre_return_val = re.sub(r"\. ([A-Za-z]+)", r" \1", pre_return_val);
365 alt_return_val = pre_return_val.split();
366 return_val = {'Bytes': orgbytes, 'ReadableWithSuffix': pre_return_val, 'ReadableWithoutSuffix': alt_return_val[0], 'ReadableSuffix': alt_return_val[1]}
367 return return_val;
368 bytes /= unitsize;
369 strformat = "%."+str(precision)+"f%s";
370 pre_return_val = (strformat % (bytes, "YiB"));
371 pre_return_val = re.sub(r"([0]+) ([A-Za-z]+)", r" \2", pre_return_val);
372 pre_return_val = re.sub(r"\. ([A-Za-z]+)", r" \1", pre_return_val);
373 alt_return_val = pre_return_val.split();
374 return_val = {'Bytes': orgbytes, 'ReadableWithSuffix': pre_return_val, 'ReadableWithoutSuffix': alt_return_val[0], 'ReadableSuffix': alt_return_val[1]}
375 return return_val;
377 def get_readable_size_from_file(infile, precision=1, unit="IEC", usehashes=False, usehashtypes="md5,sha1"):
378 unit = unit.upper();
379 usehashtypes = usehashtypes.lower();
380 getfilesize = os.path.getsize(infile);
381 return_val = get_readable_size(getfilesize, precision, unit);
382 if(usehashes):
383 hashtypelist = usehashtypes.split(",");
384 openfile = open(infile, "rb");
385 filecontents = openfile.read();
386 openfile.close();
387 listnumcount = 0;
388 listnumend = len(hashtypelist);
389 while(listnumcount < listnumend):
390 hashtypelistlow = hashtypelist[listnumcount].strip();
391 hashtypelistup = hashtypelistlow.upper();
392 filehash = hashlib.new(hashtypelistup);
393 filehash.update(filecontents);
394 filegethash = filehash.hexdigest();
395 return_val.update({hashtypelistup: filegethash});
396 listnumcount += 1;
397 return return_val;
399 def get_readable_size_from_string(instring, precision=1, unit="IEC", usehashes=False, usehashtypes="md5,sha1"):
400 unit = unit.upper();
401 usehashtypes = usehashtypes.lower();
402 getfilesize = len(instring);
403 return_val = get_readable_size(getfilesize, precision, unit);
404 if(usehashes):
405 hashtypelist = usehashtypes.split(",");
406 listnumcount = 0;
407 listnumend = len(hashtypelist);
408 while(listnumcount < listnumend):
409 hashtypelistlow = hashtypelist[listnumcount].strip();
410 hashtypelistup = hashtypelistlow.upper();
411 filehash = hashlib.new(hashtypelistup);
412 if(sys.version[0]=="2"):
413 filehash.update(instring);
414 if(sys.version[0]>="3"):
415 filehash.update(instring.encode('utf-8'));
416 filegethash = filehash.hexdigest();
417 return_val.update({hashtypelistup: filegethash});
418 listnumcount += 1;
419 return return_val;
421 def http_status_to_reason(code):
422 reasons = {
423 100: 'Continue',
424 101: 'Switching Protocols',
425 102: 'Processing',
426 200: 'OK',
427 201: 'Created',
428 202: 'Accepted',
429 203: 'Non-Authoritative Information',
430 204: 'No Content',
431 205: 'Reset Content',
432 206: 'Partial Content',
433 207: 'Multi-Status',
434 208: 'Already Reported',
435 226: 'IM Used',
436 300: 'Multiple Choices',
437 301: 'Moved Permanently',
438 302: 'Found',
439 303: 'See Other',
440 304: 'Not Modified',
441 305: 'Use Proxy',
442 307: 'Temporary Redirect',
443 308: 'Permanent Redirect',
444 400: 'Bad Request',
445 401: 'Unauthorized',
446 402: 'Payment Required',
447 403: 'Forbidden',
448 404: 'Not Found',
449 405: 'Method Not Allowed',
450 406: 'Not Acceptable',
451 407: 'Proxy Authentication Required',
452 408: 'Request Timeout',
453 409: 'Conflict',
454 410: 'Gone',
455 411: 'Length Required',
456 412: 'Precondition Failed',
457 413: 'Payload Too Large',
458 414: 'URI Too Long',
459 415: 'Unsupported Media Type',
460 416: 'Range Not Satisfiable',
461 417: 'Expectation Failed',
462 421: 'Misdirected Request',
463 422: 'Unprocessable Entity',
464 423: 'Locked',
465 424: 'Failed Dependency',
466 426: 'Upgrade Required',
467 428: 'Precondition Required',
468 429: 'Too Many Requests',
469 431: 'Request Header Fields Too Large',
470 451: 'Unavailable For Legal Reasons',
471 500: 'Internal Server Error',
472 501: 'Not Implemented',
473 502: 'Bad Gateway',
474 503: 'Service Unavailable',
475 504: 'Gateway Timeout',
476 505: 'HTTP Version Not Supported',
477 506: 'Variant Also Negotiates',
478 507: 'Insufficient Storage',
479 508: 'Loop Detected',
480 510: 'Not Extended',
481 511: 'Network Authentication Required'
483 return reasons.get(code, 'Unknown Status Code');
485 def ftp_status_to_reason(code):
486 reasons = {
487 110: 'Restart marker reply',
488 120: 'Service ready in nnn minutes',
489 125: 'Data connection already open; transfer starting',
490 150: 'File status okay; about to open data connection',
491 200: 'Command okay',
492 202: 'Command not implemented, superfluous at this site',
493 211: 'System status, or system help reply',
494 212: 'Directory status',
495 213: 'File status',
496 214: 'Help message',
497 215: 'NAME system type',
498 220: 'Service ready for new user',
499 221: 'Service closing control connection',
500 225: 'Data connection open; no transfer in progress',
501 226: 'Closing data connection',
502 227: 'Entering Passive Mode',
503 230: 'User logged in, proceed',
504 250: 'Requested file action okay, completed',
505 257: '"PATHNAME" created',
506 331: 'User name okay, need password',
507 332: 'Need account for login',
508 350: 'Requested file action pending further information',
509 421: 'Service not available, closing control connection',
510 425: 'Can\'t open data connection',
511 426: 'Connection closed; transfer aborted',
512 450: 'Requested file action not taken',
513 451: 'Requested action aborted. Local error in processing',
514 452: 'Requested action not taken. Insufficient storage space in system',
515 500: 'Syntax error, command unrecognized',
516 501: 'Syntax error in parameters or arguments',
517 502: 'Command not implemented',
518 503: 'Bad sequence of commands',
519 504: 'Command not implemented for that parameter',
520 530: 'Not logged in',
521 532: 'Need account for storing files',
522 550: 'Requested action not taken. File unavailable',
523 551: 'Requested action aborted. Page type unknown',
524 552: 'Requested file action aborted. Exceeded storage allocation',
525 553: 'Requested action not taken. File name not allowed'
527 return reasons.get(code, 'Unknown Status Code');
529 def sftp_status_to_reason(code):
530 reasons = {
531 0: 'SSH_FX_OK',
532 1: 'SSH_FX_EOF',
533 2: 'SSH_FX_NO_SUCH_FILE',
534 3: 'SSH_FX_PERMISSION_DENIED',
535 4: 'SSH_FX_FAILURE',
536 5: 'SSH_FX_BAD_MESSAGE',
537 6: 'SSH_FX_NO_CONNECTION',
538 7: 'SSH_FX_CONNECTION_LOST',
539 8: 'SSH_FX_OP_UNSUPPORTED'
541 return reasons.get(code, 'Unknown Status Code');
543 def make_http_headers_from_dict_to_list(headers={'Referer': "http://google.com/", 'User-Agent': geturls_ua, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
544 if isinstance(headers, dict):
545 returnval = [];
546 if(sys.version[0]=="2"):
547 for headkey, headvalue in headers.iteritems():
548 returnval.append((headkey, headvalue));
549 if(sys.version[0]>="3"):
550 for headkey, headvalue in headers.items():
551 returnval.append((headkey, headvalue));
552 elif isinstance(headers, list):
553 returnval = headers;
554 else:
555 returnval = False;
556 return returnval;
558 def make_http_headers_from_dict_to_pycurl(headers={'Referer': "http://google.com/", 'User-Agent': geturls_ua, 'Accept-Encoding': compression_supported, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
559 if isinstance(headers, dict):
560 returnval = [];
561 if(sys.version[0]=="2"):
562 for headkey, headvalue in headers.iteritems():
563 returnval.append(headkey+": "+headvalue);
564 if(sys.version[0]>="3"):
565 for headkey, headvalue in headers.items():
566 returnval.append(headkey+": "+headvalue);
567 elif isinstance(headers, list):
568 returnval = headers;
569 else:
570 returnval = False;
571 return returnval;
573 def make_http_headers_from_pycurl_to_dict(headers):
574 header_dict = {};
575 headers = headers.strip().split('\r\n');
576 for header in headers:
577 parts = header.split(': ', 1)
578 if(len(parts) == 2):
579 key, value = parts;
580 header_dict[key.title()] = value;
581 return header_dict;
583 def make_http_headers_from_list_to_dict(headers=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua), ("Accept-Encoding", compression_supported), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
584 if isinstance(headers, list):
585 returnval = {};
586 mli = 0;
587 mlil = len(headers);
588 while(mli<mlil):
589 returnval.update({headers[mli][0]: headers[mli][1]});
590 mli = mli + 1;
591 elif isinstance(headers, dict):
592 returnval = headers;
593 else:
594 returnval = False;
595 return returnval;
597 def get_httplib_support(checkvalue=None):
598 global haverequests, havemechanize, havehttplib2, haveurllib3, havehttpx, havehttpcore, haveparamiko, havepysftp;
599 returnval = [];
600 returnval.append("ftp");
601 returnval.append("httplib");
602 if(havehttplib2):
603 returnval.append("httplib2");
604 returnval.append("urllib");
605 if(haveurllib3):
606 returnval.append("urllib3");
607 returnval.append("request3");
608 returnval.append("request");
609 if(haverequests):
610 returnval.append("requests");
611 if(haveaiohttp):
612 returnval.append("aiohttp");
613 if(havehttpx):
614 returnval.append("httpx");
615 returnval.append("httpx2");
616 if(havemechanize):
617 returnval.append("mechanize");
618 if(havepycurl):
619 returnval.append("pycurl");
620 if(hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
621 returnval.append("pycurl2");
622 if(hasattr(pycurl, "CURL_HTTP_VERSION_3_0")):
623 returnval.append("pycurl3");
624 if(haveparamiko):
625 returnval.append("sftp");
626 if(havepysftp):
627 returnval.append("pysftp");
628 if(not checkvalue is None):
629 if(checkvalue=="urllib1" or checkvalue=="urllib2"):
630 checkvalue = "urllib";
631 if(checkvalue=="httplib1"):
632 checkvalue = "httplib";
633 if(checkvalue in returnval):
634 returnval = True;
635 else:
636 returnval = False;
637 return returnval;
639 def check_httplib_support(checkvalue="urllib"):
640 if(checkvalue=="urllib1" or checkvalue=="urllib2"):
641 checkvalue = "urllib";
642 if(checkvalue=="httplib1"):
643 checkvalue = "httplib";
644 returnval = get_httplib_support(checkvalue);
645 return returnval;
647 def get_httplib_support_list():
648 returnval = get_httplib_support(None);
649 return returnval;
651 def download_from_url(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, httplibuse="urllib", buffersize=524288, sleep=-1, timeout=10):
652 global geturls_download_sleep, havezstd, havebrotli, haveaiohttp, haverequests, havemechanize, havepycurl, havehttplib2, haveurllib3, havehttpx, havehttpcore, haveparamiko, havepysftp;
653 if(sleep<0):
654 sleep = geturls_download_sleep;
655 if(timeout<=0):
656 timeout = 10;
657 if(httplibuse=="urllib1" or httplibuse=="urllib2" or httplibuse=="request"):
658 httplibuse = "urllib";
659 if(httplibuse=="httplib1"):
660 httplibuse = "httplib";
661 if(not haverequests and httplibuse=="requests"):
662 httplibuse = "urllib";
663 if(not haveaiohttp and httplibuse=="aiohttp"):
664 httplibuse = "urllib";
665 if(not havehttpx and httplibuse=="httpx"):
666 httplibuse = "urllib";
667 if(not havehttpx and httplibuse=="httpx2"):
668 httplibuse = "urllib";
669 if(not havehttpcore and httplibuse=="httpcore"):
670 httplibuse = "urllib";
671 if(not havehttpcore and httplibuse=="httpcore2"):
672 httplibuse = "urllib";
673 if(not havemechanize and httplibuse=="mechanize"):
674 httplibuse = "urllib";
675 if(not havepycurl and httplibuse=="pycurl"):
676 httplibuse = "urllib";
677 if(not havepycurl and httplibuse=="pycurl2"):
678 httplibuse = "urllib";
679 if(havepycurl and httplibuse=="pycurl2" and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
680 httplibuse = "pycurl";
681 if(not havepycurl and httplibuse=="pycurl3"):
682 httplibuse = "urllib";
683 if(havepycurl and httplibuse=="pycurl3" and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
684 httplibuse = "pycurl2";
685 if(havepycurl and httplibuse=="pycurl3" and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
686 httplibuse = "pycurl";
687 if(not havehttplib2 and httplibuse=="httplib2"):
688 httplibuse = "httplib";
689 if(not haveparamiko and httplibuse=="sftp"):
690 httplibuse = "ftp";
691 if(not havepysftp and httplibuse=="pysftp"):
692 httplibuse = "ftp";
693 if(httplibuse=="urllib" or httplibuse=="request"):
694 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
695 elif(httplibuse=="request"):
696 returnval = download_from_url_with_request(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
697 elif(httplibuse=="request3"):
698 returnval = download_from_url_with_request3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
699 elif(httplibuse=="httplib"):
700 returnval = download_from_url_with_httplib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
701 elif(httplibuse=="httplib2"):
702 returnval = download_from_url_with_httplib2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
703 elif(httplibuse=="urllib3" or httplibuse=="request3"):
704 returnval = download_from_url_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
705 elif(httplibuse=="requests"):
706 returnval = download_from_url_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
707 elif(httplibuse=="aiohttp"):
708 returnval = download_from_url_with_aiohttp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
709 elif(httplibuse=="httpx"):
710 returnval = download_from_url_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
711 elif(httplibuse=="httpx2"):
712 returnval = download_from_url_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
713 elif(httplibuse=="httpcore"):
714 returnval = download_from_url_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
715 elif(httplibuse=="httpcore2"):
716 returnval = download_from_url_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
717 elif(httplibuse=="mechanize"):
718 returnval = download_from_url_with_mechanize(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
719 elif(httplibuse=="pycurl"):
720 returnval = download_from_url_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
721 elif(httplibuse=="pycurl2"):
722 returnval = download_from_url_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
723 elif(httplibuse=="pycurl3"):
724 returnval = download_from_url_with_pycurl3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
725 elif(httplibuse=="ftp"):
726 returnval = download_from_url_with_ftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
727 elif(httplibuse=="sftp"):
728 returnval = download_from_url_with_sftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
729 elif(httplibuse=="pysftp"):
730 returnval = download_from_url_with_pysftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
731 else:
732 returnval = False;
733 return returnval;
735 def download_from_url_from_list(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, httplibuse="urllib", buffersize=524288, sleep=-1, timeout=10):
736 if(isinstance(httpurl, list)):
737 pass;
738 elif(isinstance(httpurl, tuple)):
739 pass;
740 elif(isinstance(httpurl, dict)):
741 httpurl = httpurl.values();
742 else:
743 httpurl = [httpurl];
744 listsize = len(httpurl);
745 listcount = 0;
746 returnval = [];
747 while(listcount<listsize):
748 ouputval = download_from_url(httpurl[listcount], httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, httplibuse, buffersize, sleep, timeout);
749 returnval.append(ouputval);
750 listcount += 1;
751 return returnval;
753 def download_from_url_file(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, httplibuse="urllib", ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
754 global geturls_download_sleep, havezstd, havebrotli, haveaiohttp, haverequests, havemechanize, havepycurl, havehttplib2, haveurllib3, havehttpx, havehttpcore, haveparamiko, havepysftp;
755 if(sleep<0):
756 sleep = geturls_download_sleep;
757 if(timeout<=0):
758 timeout = 10;
759 if(httplibuse=="urllib1" or httplibuse=="urllib2" or httplibuse=="request"):
760 httplibuse = "urllib";
761 if(httplibuse=="httplib1"):
762 httplibuse = "httplib";
763 if(not haverequests and httplibuse=="requests"):
764 httplibuse = "urllib";
765 if(not haveaiohttp and httplibuse=="aiohttp"):
766 httplibuse = "urllib";
767 if(not havehttpx and httplibuse=="httpx"):
768 httplibuse = "urllib";
769 if(not havehttpx and httplibuse=="httpx2"):
770 httplibuse = "urllib";
771 if(not havehttpcore and httplibuse=="httpcore"):
772 httplibuse = "urllib";
773 if(not havehttpcore and httplibuse=="httpcore2"):
774 httplibuse = "urllib";
775 if(not havemechanize and httplibuse=="mechanize"):
776 httplibuse = "urllib";
777 if(not havepycurl and httplibuse=="pycurl"):
778 httplibuse = "urllib";
779 if(not havepycurl and httplibuse=="pycurl2"):
780 httplibuse = "urllib";
781 if(havepycurl and httplibuse=="pycurl2" and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
782 httplibuse = "pycurl";
783 if(not havepycurl and httplibuse=="pycurl3"):
784 httplibuse = "urllib";
785 if(havepycurl and httplibuse=="pycurl3" and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
786 httplibuse = "pycurl2";
787 if(havepycurl and httplibuse=="pycurl3" and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
788 httplibuse = "pycurl";
789 if(not havehttplib2 and httplibuse=="httplib2"):
790 httplibuse = "httplib";
791 if(not haveparamiko and httplibuse=="sftp"):
792 httplibuse = "ftp";
793 if(not haveparamiko and httplibuse=="pysftp"):
794 httplibuse = "ftp";
795 if(httplibuse=="urllib" or httplibuse=="request"):
796 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
797 elif(httplibuse=="request"):
798 returnval = download_from_url_file_with_request(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
799 elif(httplibuse=="request3"):
800 returnval = download_from_url_file_with_request3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
801 elif(httplibuse=="httplib"):
802 returnval = download_from_url_file_with_httplib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
803 elif(httplibuse=="httplib2"):
804 returnval = download_from_url_file_with_httplib2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
805 elif(httplibuse=="urllib3" or httplibuse=="request3"):
806 returnval = download_from_url_file_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
807 elif(httplibuse=="requests"):
808 returnval = download_from_url_file_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
809 elif(httplibuse=="aiohttp"):
810 returnval = download_from_url_file_with_aiohttp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
811 elif(httplibuse=="httpx"):
812 returnval = download_from_url_file_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
813 elif(httplibuse=="httpx2"):
814 returnval = download_from_url_file_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
815 elif(httplibuse=="httpcore"):
816 returnval = download_from_url_file_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
817 elif(httplibuse=="httpcore2"):
818 returnval = download_from_url_file_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
819 elif(httplibuse=="mechanize"):
820 returnval = download_from_url_file_with_mechanize(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
821 elif(httplibuse=="pycurl"):
822 returnval = download_from_url_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
823 elif(httplibuse=="pycurl2"):
824 returnval = download_from_url_file_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
825 elif(httplibuse=="pycurl3"):
826 returnval = download_from_url_file_with_pycurl3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
827 elif(httplibuse=="ftp"):
828 returnval = download_from_url_file_with_ftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
829 elif(httplibuse=="sftp"):
830 returnval = download_from_url_file_with_sftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
831 elif(httplibuse=="pysftp"):
832 returnval = download_from_url_file_with_pysftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
833 else:
834 returnval = False;
835 return returnval;
837 def download_from_url_file_with_list(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, httplibuse="urllib", ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
838 if(isinstance(httpurl, list)):
839 pass;
840 elif(isinstance(httpurl, tuple)):
841 pass;
842 elif(isinstance(httpurl, dict)):
843 httpurl = httpurl.values();
844 else:
845 httpurl = [httpurl];
846 listsize = len(httpurl);
847 listcount = 0;
848 returnval = [];
849 while(listcount<listsize):
850 ouputval = download_from_url_file(httpurl[listcount], httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, httplibuse, ranges, buffersize, sleep, timeout);
851 returnval.append(ouputval);
852 listcount += 1;
853 return returnval;
855 def download_from_url_to_file(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, httplibuse="urllib", outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
856 global geturls_download_sleep, havezstd, havebrotli, haveaiohttp, haverequests, havemechanize, havepycurl, havehttplib2, haveurllib3, havehttpx, havehttpcore, haveparamiko, havepysftp;
857 if(sleep<0):
858 sleep = geturls_download_sleep;
859 if(timeout<=0):
860 timeout = 10;
861 if(httplibuse=="urllib1" or httplibuse=="urllib2" or httplibuse=="request"):
862 httplibuse = "urllib";
863 if(httplibuse=="httplib1"):
864 httplibuse = "httplib";
865 if(not haverequests and httplibuse=="requests"):
866 httplibuse = "urllib";
867 if(not haveaiohttp and httplibuse=="aiohttp"):
868 httplibuse = "urllib";
869 if(not havehttpx and httplibuse=="httpx"):
870 httplibuse = "urllib";
871 if(not havehttpx and httplibuse=="httpx2"):
872 httplibuse = "urllib";
873 if(not havehttpcore and httplibuse=="httpcore"):
874 httplibuse = "urllib";
875 if(not havehttpcore and httplibuse=="httpcore2"):
876 httplibuse = "urllib";
877 if(not havemechanize and httplibuse=="mechanize"):
878 httplibuse = "urllib";
879 if(not havepycurl and httplibuse=="pycurl"):
880 httplibuse = "urllib";
881 if(not havepycurl and httplibuse=="pycurl2"):
882 httplibuse = "urllib";
883 if(havepycurl and httplibuse=="pycurl2" and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
884 httplibuse = "pycurl";
885 if(not havepycurl and httplibuse=="pycurl3"):
886 httplibuse = "urllib";
887 if(havepycurl and httplibuse=="pycurl3" and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
888 httplibuse = "pycurl2";
889 if(havepycurl and httplibuse=="pycurl3" and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
890 httplibuse = "pycurl";
891 if(not havehttplib2 and httplibuse=="httplib2"):
892 httplibuse = "httplib";
893 if(not haveparamiko and httplibuse=="sftp"):
894 httplibuse = "ftp";
895 if(not havepysftp and httplibuse=="pysftp"):
896 httplibuse = "ftp";
897 if(httplibuse=="urllib" or httplibuse=="request"):
898 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
899 elif(httplibuse=="request"):
900 returnval = download_from_url_to_file_with_request(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
901 elif(httplibuse=="request3"):
902 returnval = download_from_url_to_file_with_request3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
903 elif(httplibuse=="httplib"):
904 returnval = download_from_url_to_file_with_httplib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
905 elif(httplibuse=="httplib2"):
906 returnval = download_from_url_to_file_with_httplib2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
907 elif(httplibuse=="urllib3" or httplibuse=="request3"):
908 returnval = download_from_url_to_file_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
909 elif(httplibuse=="requests"):
910 returnval = download_from_url_to_file_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
911 elif(httplibuse=="aiohttp"):
912 returnval = download_from_url_to_file_with_aiohttp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
913 elif(httplibuse=="httpx"):
914 returnval = download_from_url_to_file_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
915 elif(httplibuse=="httpx2"):
916 returnval = download_from_url_to_file_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
917 elif(httplibuse=="httpcore"):
918 returnval = download_from_url_to_file_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
919 elif(httplibuse=="httpcore2"):
920 returnval = download_from_url_to_file_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout);
921 elif(httplibuse=="mechanize"):
922 returnval = download_from_url_to_file_with_mechanize(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
923 elif(httplibuse=="pycurl"):
924 returnval = download_from_url_to_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
925 elif(httplibuse=="pycurl2"):
926 returnval = download_from_url_to_file_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
927 elif(httplibuse=="pycurl3"):
928 returnval = download_from_url_to_file_with_pycurl3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
929 elif(httplibuse=="ftp"):
930 returnval = download_from_url_to_file_with_ftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
931 elif(httplibuse=="sftp"):
932 returnval = download_from_url_to_file_with_sftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
933 elif(httplibuse=="pysftp"):
934 returnval = download_from_url_to_file_with_pysftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, outfile, outpath, ranges, buffersize, sleep, timeout);
935 else:
936 returnval = False;
937 return returnval;
939 def download_from_url_to_file_with_list(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, httplibuse="urllib", outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
940 if(isinstance(httpurl, list)):
941 pass;
942 elif(isinstance(httpurl, tuple)):
943 pass;
944 elif(isinstance(httpurl, dict)):
945 httpurl = httpurl.values();
946 else:
947 httpurl = [httpurl];
948 listsize = len(httpurl);
949 listcount = 0;
950 returnval = [];
951 while(listcount<listsize):
952 ouputval = download_from_url_to_file(httpurl[listcount], httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, httplibuse, outfile, outpath, ranges, buffersize, sleep, timeout);
953 returnval.append(ouputval);
954 listcount += 1;
955 return returnval;
957 def download_from_url_with_urllib(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
958 global geturls_download_sleep, havezstd, havebrotli;
959 if(sleep<0):
960 sleep = geturls_download_sleep;
961 if(timeout<=0):
962 timeout = 10;
963 urlparts = urlparse.urlparse(httpurl);
964 if(isinstance(httpheaders, list)):
965 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
966 httpheaders = fix_header_names(httpheaders);
967 if(httpuseragent is not None):
968 if('User-Agent' in httpheaders):
969 httpheaders['User-Agent'] = httpuseragent;
970 else:
971 httpuseragent.update({'User-Agent': httpuseragent});
972 if(httpreferer is not None):
973 if('Referer' in httpheaders):
974 httpheaders['Referer'] = httpreferer;
975 else:
976 httpuseragent.update({'Referer': httpreferer});
977 if(urlparts.username is not None or urlparts.password is not None):
978 if(sys.version[0]=="2"):
979 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
980 if(sys.version[0]>="3"):
981 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
982 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
983 geturls_opener = build_opener(HTTPCookieProcessor(httpcookie));
984 if(isinstance(httpheaders, dict)):
985 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
986 geturls_opener.addheaders = httpheaders;
987 time.sleep(sleep);
988 if(postdata is not None and not isinstance(postdata, dict)):
989 postdata = urlencode(postdata);
990 try:
991 geturls_request = Request(httpurl);
992 if(httpmethod=="GET"):
993 geturls_text = geturls_opener.open(geturls_request);
994 elif(httpmethod=="POST"):
995 geturls_text = geturls_opener.open(geturls_request, data=postdata);
996 else:
997 geturls_text = geturls_opener.open(geturls_request);
998 except HTTPError as geturls_text_error:
999 geturls_text = geturls_text_error;
1000 log.info("Error With URL "+httpurl);
1001 except URLError:
1002 log.info("Error With URL "+httpurl);
1003 return False;
1004 except socket.timeout:
1005 log.info("Error With URL "+httpurl);
1006 return False;
1007 httpcodeout = geturls_text.getcode();
1008 try:
1009 httpcodereason = geturls_text.reason;
1010 except AttributeError:
1011 httpcodereason = http_status_to_reason(geturls_text.getcode());
1012 try:
1013 httpversionout = geturls_text.version;
1014 except AttributeError:
1015 httpversionout = "1.1";
1016 httpmethodout = geturls_request.get_method();
1017 httpurlout = geturls_text.geturl();
1018 httpheaderout = geturls_text.info();
1019 httpheadersentout = httpheaders;
1020 if(isinstance(httpheaderout, list)):
1021 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
1022 httpheaderout = fix_header_names(httpheaderout);
1023 if(sys.version[0]=="2"):
1024 try:
1025 prehttpheaderout = httpheaderout;
1026 httpheaderkeys = httpheaderout.keys();
1027 imax = len(httpheaderkeys);
1028 ic = 0;
1029 httpheaderout = {};
1030 while(ic < imax):
1031 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
1032 ic += 1;
1033 except AttributeError:
1034 pass;
1035 if(isinstance(httpheadersentout, list)):
1036 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
1037 httpheadersentout = fix_header_names(httpheadersentout);
1038 downloadsize = httpheaderout.get('Content-Length');
1039 if(downloadsize is not None):
1040 downloadsize = int(downloadsize);
1041 if downloadsize is None: downloadsize = 0;
1042 fulldatasize = 0;
1043 prevdownsize = 0;
1044 log.info("Downloading URL "+httpurl);
1045 with BytesIO() as strbuf:
1046 while True:
1047 databytes = geturls_text.read(buffersize);
1048 if not databytes: break;
1049 datasize = len(databytes);
1050 fulldatasize = datasize + fulldatasize;
1051 percentage = "";
1052 if(downloadsize>0):
1053 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1054 downloaddiff = fulldatasize - prevdownsize;
1055 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1056 prevdownsize = fulldatasize;
1057 strbuf.write(databytes);
1058 strbuf.seek(0);
1059 returnval_content = strbuf.read();
1060 if(httpheaderout.get("Content-Encoding")=="gzip"):
1061 try:
1062 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
1063 except zlib.error:
1064 pass;
1065 elif(httpheaderout.get("Content-Encoding")=="deflate"):
1066 try:
1067 returnval_content = zlib.decompress(returnval_content);
1068 except zlib.error:
1069 pass;
1070 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
1071 try:
1072 returnval_content = brotli.decompress(returnval_content);
1073 except brotli.error:
1074 pass;
1075 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
1076 try:
1077 returnval_content = zstandard.decompress(returnval_content);
1078 except zstandard.error:
1079 pass;
1080 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
1081 try:
1082 returnval_content = lzma.decompress(returnval_content);
1083 except zstandard.error:
1084 pass;
1085 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
1086 try:
1087 returnval_content = bz2.decompress(returnval_content);
1088 except zstandard.error:
1089 pass;
1090 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "urllib"};
1091 geturls_text.close();
1092 return returnval;
1094 def download_from_url_file_with_urllib(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1095 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
1096 exec_time_start = time.time();
1097 myhash = hashlib.new("sha1");
1098 if(sys.version[0]=="2"):
1099 myhash.update(httpurl);
1100 myhash.update(str(buffersize));
1101 myhash.update(str(exec_time_start));
1102 if(sys.version[0]>="3"):
1103 myhash.update(httpurl.encode('utf-8'));
1104 myhash.update(str(buffersize).encode('utf-8'));
1105 myhash.update(str(exec_time_start).encode('utf-8'));
1106 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
1107 if(sleep<0):
1108 sleep = geturls_download_sleep;
1109 if(timeout<=0):
1110 timeout = 10;
1111 pretmpfilename = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
1112 if(not pretmpfilename):
1113 return False;
1114 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
1115 tmpfilename = f.name;
1116 try:
1117 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1118 except AttributeError:
1119 try:
1120 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1121 except ValueError:
1122 pass;
1123 except ValueError:
1124 pass;
1125 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1126 f.write(pretmpfilename.get('Content'));
1127 f.close();
1128 exec_time_end = time.time();
1129 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
1130 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
1131 return returnval;
1133 def download_from_url_to_file_with_urllib(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1134 global geturls_download_sleep, havezstd, havebrotli;
1135 if(sleep<0):
1136 sleep = geturls_download_sleep;
1137 if(timeout<=0):
1138 timeout = 10;
1139 if(not outfile=="-"):
1140 outpath = outpath.rstrip(os.path.sep);
1141 filepath = os.path.realpath(outpath+os.path.sep+outfile);
1142 if(not os.path.exists(outpath)):
1143 os.makedirs(outpath);
1144 if(os.path.exists(outpath) and os.path.isfile(outpath)):
1145 return False;
1146 if(os.path.exists(filepath) and os.path.isdir(filepath)):
1147 return False;
1148 pretmpfilename = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1149 if(not pretmpfilename):
1150 return False;
1151 tmpfilename = pretmpfilename.get('Filename');
1152 downloadsize = int(os.path.getsize(tmpfilename));
1153 fulldatasize = 0;
1154 log.info("Moving file "+tmpfilename+" to "+filepath);
1155 exec_time_start = time.time();
1156 shutil.move(tmpfilename, filepath);
1157 try:
1158 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1159 except AttributeError:
1160 try:
1161 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1162 except ValueError:
1163 pass;
1164 except ValueError:
1165 pass;
1166 exec_time_end = time.time();
1167 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
1168 if(os.path.exists(tmpfilename)):
1169 os.remove(tmpfilename);
1170 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1171 if(outfile=="-"):
1172 pretmpfilename = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1173 tmpfilename = pretmpfilename.get('Filename');
1174 downloadsize = int(os.path.getsize(tmpfilename));
1175 fulldatasize = 0;
1176 prevdownsize = 0;
1177 exec_time_start = time.time();
1178 with open(tmpfilename, 'rb') as ft:
1179 f = BytesIO();
1180 while True:
1181 databytes = ft.read(buffersize[1]);
1182 if not databytes: break;
1183 datasize = len(databytes);
1184 fulldatasize = datasize + fulldatasize;
1185 percentage = "";
1186 if(downloadsize>0):
1187 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1188 downloaddiff = fulldatasize - prevdownsize;
1189 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1190 prevdownsize = fulldatasize;
1191 f.write(databytes);
1192 f.seek(0);
1193 fdata = f.getvalue();
1194 f.close();
1195 ft.close();
1196 os.remove(tmpfilename);
1197 exec_time_end = time.time();
1198 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
1199 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1200 return returnval;
1202 def download_from_url_with_httplib(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1203 global geturls_download_sleep, havezstd, havebrotli, havezstd, havebrotli;
1204 if(sleep<0):
1205 sleep = geturls_download_sleep;
1206 if(timeout<=0):
1207 timeout = 10;
1208 urlparts = urlparse.urlparse(httpurl);
1209 if(isinstance(httpheaders, list)):
1210 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
1211 httpheaders = fix_header_names(httpheaders);
1212 if(httpuseragent is not None):
1213 if('User-Agent' in httpheaders):
1214 httpheaders['User-Agent'] = httpuseragent;
1215 else:
1216 httpuseragent.update({'User-Agent': httpuseragent});
1217 if(httpreferer is not None):
1218 if('Referer' in httpheaders):
1219 httpheaders['Referer'] = httpreferer;
1220 else:
1221 httpuseragent.update({'Referer': httpreferer});
1222 if(urlparts.username is not None or urlparts.password is not None):
1223 if(sys.version[0]=="2"):
1224 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
1225 if(sys.version[0]>="3"):
1226 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
1227 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
1228 geturls_opener = build_opener(HTTPCookieProcessor(httpcookie));
1229 geturls_opener.addheaders = httpheaders;
1230 time.sleep(sleep);
1231 if(urlparts[0]=="http"):
1232 httpconn = HTTPConnection(urlparts[1], timeout=timeout);
1233 elif(urlparts[0]=="https"):
1234 httpconn = HTTPSConnection(urlparts[1], timeout=timeout);
1235 else:
1236 return False;
1237 if(postdata is not None and not isinstance(postdata, dict)):
1238 postdata = urlencode(postdata);
1239 try:
1240 if(httpmethod=="GET"):
1241 httpconn.request("GET", urlparts[2], headers=httpheaders);
1242 elif(httpmethod=="POST"):
1243 httpconn.request("GET", urlparts[2], body=postdata, headers=httpheaders);
1244 else:
1245 httpconn.request("GET", urlparts[2], headers=httpheaders);
1246 except socket.timeout:
1247 log.info("Error With URL "+httpurl);
1248 return False;
1249 except socket.gaierror:
1250 log.info("Error With URL "+httpurl);
1251 return False;
1252 except BlockingIOError:
1253 log.info("Error With URL "+httpurl);
1254 return False;
1255 geturls_text = httpconn.getresponse();
1256 httpcodeout = geturls_text.status;
1257 httpcodereason = geturls_text.reason;
1258 if(geturls_text.version=="10"):
1259 httpversionout = "1.0";
1260 else:
1261 httpversionout = "1.1";
1262 httpmethodout = geturls_text._method;
1263 httpurlout = httpurl;
1264 httpheaderout = geturls_text.getheaders();
1265 httpheadersentout = httpheaders;
1266 if(isinstance(httpheaderout, list)):
1267 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
1268 if(sys.version[0]=="2"):
1269 try:
1270 prehttpheaderout = httpheaderout;
1271 httpheaderkeys = httpheaderout.keys();
1272 imax = len(httpheaderkeys);
1273 ic = 0;
1274 httpheaderout = {};
1275 while(ic < imax):
1276 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
1277 ic += 1;
1278 except AttributeError:
1279 pass;
1280 httpheaderout = fix_header_names(httpheaderout);
1281 if(isinstance(httpheadersentout, list)):
1282 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
1283 httpheadersentout = fix_header_names(httpheadersentout);
1284 downloadsize = httpheaderout.get('Content-Length');
1285 if(downloadsize is not None):
1286 downloadsize = int(downloadsize);
1287 if downloadsize is None: downloadsize = 0;
1288 fulldatasize = 0;
1289 prevdownsize = 0;
1290 log.info("Downloading URL "+httpurl);
1291 with BytesIO() as strbuf:
1292 while True:
1293 databytes = geturls_text.read(buffersize);
1294 if not databytes: break;
1295 datasize = len(databytes);
1296 fulldatasize = datasize + fulldatasize;
1297 percentage = "";
1298 if(downloadsize>0):
1299 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1300 downloaddiff = fulldatasize - prevdownsize;
1301 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1302 prevdownsize = fulldatasize;
1303 strbuf.write(databytes);
1304 strbuf.seek(0);
1305 returnval_content = strbuf.read();
1306 if(httpheaderout.get("Content-Encoding")=="gzip"):
1307 try:
1308 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
1309 except zlib.error:
1310 pass;
1311 elif(httpheaderout.get("Content-Encoding")=="deflate"):
1312 try:
1313 returnval_content = zlib.decompress(returnval_content);
1314 except zlib.error:
1315 pass;
1316 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
1317 try:
1318 returnval_content = brotli.decompress(returnval_content);
1319 except brotli.error:
1320 pass;
1321 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
1322 try:
1323 returnval_content = zstandard.decompress(returnval_content);
1324 except zstandard.error:
1325 pass;
1326 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
1327 try:
1328 returnval_content = lzma.decompress(returnval_content);
1329 except zstandard.error:
1330 pass;
1331 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
1332 try:
1333 returnval_content = bz2.decompress(returnval_content);
1334 except zstandard.error:
1335 pass;
1336 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "httplib"};
1337 geturls_text.close();
1338 return returnval;
1340 def download_from_url_file_with_httplib(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1341 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
1342 exec_time_start = time.time();
1343 myhash = hashlib.new("sha1");
1344 if(sys.version[0]=="2"):
1345 myhash.update(httpurl);
1346 myhash.update(str(buffersize));
1347 myhash.update(str(exec_time_start));
1348 if(sys.version[0]>="3"):
1349 myhash.update(httpurl.encode('utf-8'));
1350 myhash.update(str(buffersize).encode('utf-8'));
1351 myhash.update(str(exec_time_start).encode('utf-8'));
1352 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
1353 if(sleep<0):
1354 sleep = geturls_download_sleep;
1355 if(timeout<=0):
1356 timeout = 10;
1357 pretmpfilename = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
1358 if(not pretmpfilename):
1359 return False;
1360 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
1361 tmpfilename = f.name;
1362 try:
1363 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1364 except AttributeError:
1365 try:
1366 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1367 except ValueError:
1368 pass;
1369 except ValueError:
1370 pass;
1371 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1372 f.write(pretmpfilename.get('Content'));
1373 f.close();
1374 exec_time_end = time.time();
1375 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
1376 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
1377 return returnval;
1379 def download_from_url_to_file_with_httplib(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1380 global geturls_download_sleep, havezstd, havebrotli;
1381 if(sleep<0):
1382 sleep = geturls_download_sleep;
1383 if(timeout<=0):
1384 timeout = 10;
1385 if(not outfile=="-"):
1386 outpath = outpath.rstrip(os.path.sep);
1387 filepath = os.path.realpath(outpath+os.path.sep+outfile);
1388 if(not os.path.exists(outpath)):
1389 os.makedirs(outpath);
1390 if(os.path.exists(outpath) and os.path.isfile(outpath)):
1391 return False;
1392 if(os.path.exists(filepath) and os.path.isdir(filepath)):
1393 return False;
1394 pretmpfilename = download_from_url_file_with_httplib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1395 if(not pretmpfilename):
1396 return False;
1397 tmpfilename = pretmpfilename.get('Filename');
1398 downloadsize = int(os.path.getsize(tmpfilename));
1399 fulldatasize = 0;
1400 log.info("Moving file "+tmpfilename+" to "+filepath);
1401 exec_time_start = time.time();
1402 shutil.move(tmpfilename, filepath);
1403 try:
1404 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1405 except AttributeError:
1406 try:
1407 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1408 except ValueError:
1409 pass;
1410 except ValueError:
1411 pass;
1412 exec_time_end = time.time();
1413 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
1414 if(os.path.exists(tmpfilename)):
1415 os.remove(tmpfilename);
1416 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1417 if(outfile=="-"):
1418 pretmpfilename = download_from_url_file_with_httplib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1419 tmpfilename = pretmpfilename.get('Filename');
1420 downloadsize = int(os.path.getsize(tmpfilename));
1421 fulldatasize = 0;
1422 prevdownsize = 0;
1423 exec_time_start = time.time();
1424 with open(tmpfilename, 'rb') as ft:
1425 f = BytesIO();
1426 while True:
1427 databytes = ft.read(buffersize[1]);
1428 if not databytes: break;
1429 datasize = len(databytes);
1430 fulldatasize = datasize + fulldatasize;
1431 percentage = "";
1432 if(downloadsize>0):
1433 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1434 downloaddiff = fulldatasize - prevdownsize;
1435 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1436 prevdownsize = fulldatasize;
1437 f.write(databytes);
1438 f.seek(0);
1439 fdata = f.getvalue();
1440 f.close();
1441 ft.close();
1442 os.remove(tmpfilename);
1443 exec_time_end = time.time();
1444 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
1445 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1446 return returnval;
1448 if(havehttplib2):
1449 def download_from_url_with_httplib2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1450 global geturls_download_sleep, havezstd, havebrotli;
1451 if(sleep<0):
1452 sleep = geturls_download_sleep;
1453 if(timeout<=0):
1454 timeout = 10;
1455 urlparts = urlparse.urlparse(httpurl);
1456 if(isinstance(httpheaders, list)):
1457 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
1458 httpheaders = fix_header_names(httpheaders);
1459 if(httpuseragent is not None):
1460 if('User-Agent' in httpheaders):
1461 httpheaders['User-Agent'] = httpuseragent;
1462 else:
1463 httpuseragent.update({'User-Agent': httpuseragent});
1464 if(httpreferer is not None):
1465 if('Referer' in httpheaders):
1466 httpheaders['Referer'] = httpreferer;
1467 else:
1468 httpuseragent.update({'Referer': httpreferer});
1469 if(urlparts.username is not None or urlparts.password is not None):
1470 if(sys.version[0]=="2"):
1471 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
1472 if(sys.version[0]>="3"):
1473 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
1474 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
1475 geturls_opener = build_opener(HTTPCookieProcessor(httpcookie));
1476 geturls_opener.addheaders = httpheaders;
1477 time.sleep(sleep);
1478 if(urlparts[0]=="http"):
1479 httpconn = HTTPConnectionWithTimeout(urlparts[1], timeout=timeout);
1480 elif(urlparts[0]=="https"):
1481 httpconn = HTTPSConnectionWithTimeout(urlparts[1], timeout=timeout);
1482 else:
1483 return False;
1484 if(postdata is not None and not isinstance(postdata, dict)):
1485 postdata = urlencode(postdata);
1486 try:
1487 if(httpmethod=="GET"):
1488 httpconn.request("GET", urlparts[2], headers=httpheaders);
1489 elif(httpmethod=="POST"):
1490 httpconn.request("GET", urlparts[2], body=postdata, headers=httpheaders);
1491 else:
1492 httpconn.request("GET", urlparts[2], headers=httpheaders);
1493 except socket.timeout:
1494 log.info("Error With URL "+httpurl);
1495 return False;
1496 except socket.gaierror:
1497 log.info("Error With URL "+httpurl);
1498 return False;
1499 except BlockingIOError:
1500 log.info("Error With URL "+httpurl);
1501 return False;
1502 geturls_text = httpconn.getresponse();
1503 httpcodeout = geturls_text.status;
1504 httpcodereason = geturls_text.reason;
1505 if(geturls_text.version=="10"):
1506 httpversionout = "1.0";
1507 else:
1508 httpversionout = "1.1";
1509 httpmethodout = httpmethod;
1510 httpurlout = httpurl;
1511 httpheaderout = geturls_text.getheaders();
1512 httpheadersentout = httpheaders;
1513 if(isinstance(httpheaderout, list)):
1514 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
1515 if(sys.version[0]=="2"):
1516 try:
1517 prehttpheaderout = httpheaderout;
1518 httpheaderkeys = httpheaderout.keys();
1519 imax = len(httpheaderkeys);
1520 ic = 0;
1521 httpheaderout = {};
1522 while(ic < imax):
1523 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
1524 ic += 1;
1525 except AttributeError:
1526 pass;
1527 httpheaderout = fix_header_names(httpheaderout);
1528 if(isinstance(httpheadersentout, list)):
1529 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
1530 httpheadersentout = fix_header_names(httpheadersentout);
1531 downloadsize = httpheaderout.get('Content-Length');
1532 if(downloadsize is not None):
1533 downloadsize = int(downloadsize);
1534 if downloadsize is None: downloadsize = 0;
1535 fulldatasize = 0;
1536 prevdownsize = 0;
1537 log.info("Downloading URL "+httpurl);
1538 with BytesIO() as strbuf:
1539 while True:
1540 databytes = geturls_text.read(buffersize);
1541 if not databytes: break;
1542 datasize = len(databytes);
1543 fulldatasize = datasize + fulldatasize;
1544 percentage = "";
1545 if(downloadsize>0):
1546 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1547 downloaddiff = fulldatasize - prevdownsize;
1548 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1549 prevdownsize = fulldatasize;
1550 strbuf.write(databytes);
1551 strbuf.seek(0);
1552 returnval_content = strbuf.read();
1553 if(httpheaderout.get("Content-Encoding")=="gzip"):
1554 try:
1555 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
1556 except zlib.error:
1557 pass;
1558 elif(httpheaderout.get("Content-Encoding")=="deflate"):
1559 try:
1560 returnval_content = zlib.decompress(returnval_content);
1561 except zlib.error:
1562 pass;
1563 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
1564 try:
1565 returnval_content = brotli.decompress(returnval_content);
1566 except brotli.error:
1567 pass;
1568 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
1569 try:
1570 returnval_content = zstandard.decompress(returnval_content);
1571 except zstandard.error:
1572 pass;
1573 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
1574 try:
1575 returnval_content = lzma.decompress(returnval_content);
1576 except zstandard.error:
1577 pass;
1578 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
1579 try:
1580 returnval_content = bz2.decompress(returnval_content);
1581 except zstandard.error:
1582 pass;
1583 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "httplib2"};
1584 geturls_text.close();
1585 return returnval;
1587 if(not havehttplib2):
1588 def download_from_url_with_httplib2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1589 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
1590 return returnval;
1592 if(havehttplib2):
1593 def download_from_url_file_with_httplib2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1594 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
1595 exec_time_start = time.time();
1596 myhash = hashlib.new("sha1");
1597 if(sys.version[0]=="2"):
1598 myhash.update(httpurl);
1599 myhash.update(str(buffersize));
1600 myhash.update(str(exec_time_start));
1601 if(sys.version[0]>="3"):
1602 myhash.update(httpurl.encode('utf-8'));
1603 myhash.update(str(buffersize).encode('utf-8'));
1604 myhash.update(str(exec_time_start).encode('utf-8'));
1605 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
1606 if(sleep<0):
1607 sleep = geturls_download_sleep;
1608 if(timeout<=0):
1609 timeout = 10;
1610 pretmpfilename = download_from_url_with_httplib2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
1611 if(not pretmpfilename):
1612 return False;
1613 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
1614 tmpfilename = f.name;
1615 try:
1616 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1617 except AttributeError:
1618 try:
1619 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1620 except ValueError:
1621 pass;
1622 except ValueError:
1623 pass;
1624 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1625 f.write(pretmpfilename.get('Content'));
1626 f.close();
1627 exec_time_end = time.time();
1628 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
1629 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
1630 return returnval;
1632 if(not havehttplib2):
1633 def download_from_url_file_with_httplib2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1634 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
1635 return returnval;
1637 if(havehttplib2):
1638 def download_from_url_to_file_with_httplib2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1639 global geturls_download_sleep, havezstd, havebrotli;
1640 if(sleep<0):
1641 sleep = geturls_download_sleep;
1642 if(timeout<=0):
1643 timeout = 10;
1644 if(not outfile=="-"):
1645 outpath = outpath.rstrip(os.path.sep);
1646 filepath = os.path.realpath(outpath+os.path.sep+outfile);
1647 if(not os.path.exists(outpath)):
1648 os.makedirs(outpath);
1649 if(os.path.exists(outpath) and os.path.isfile(outpath)):
1650 return False;
1651 if(os.path.exists(filepath) and os.path.isdir(filepath)):
1652 return False;
1653 pretmpfilename = download_from_url_file_with_httplib2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1654 if(not pretmpfilename):
1655 return False;
1656 tmpfilename = pretmpfilename.get('Filename');
1657 downloadsize = int(os.path.getsize(tmpfilename));
1658 fulldatasize = 0;
1659 log.info("Moving file "+tmpfilename+" to "+filepath);
1660 exec_time_start = time.time();
1661 shutil.move(tmpfilename, filepath);
1662 try:
1663 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1664 except AttributeError:
1665 try:
1666 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1667 except ValueError:
1668 pass;
1669 except ValueError:
1670 pass;
1671 exec_time_end = time.time();
1672 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
1673 if(os.path.exists(tmpfilename)):
1674 os.remove(tmpfilename);
1675 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1676 if(outfile=="-"):
1677 pretmpfilename = download_from_url_file_with_httplib2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1678 tmpfilename = pretmpfilename.get('Filename');
1679 downloadsize = int(os.path.getsize(tmpfilename));
1680 fulldatasize = 0;
1681 prevdownsize = 0;
1682 exec_time_start = time.time();
1683 with open(tmpfilename, 'rb') as ft:
1684 f = BytesIO();
1685 while True:
1686 databytes = ft.read(buffersize[1]);
1687 if not databytes: break;
1688 datasize = len(databytes);
1689 fulldatasize = datasize + fulldatasize;
1690 percentage = "";
1691 if(downloadsize>0):
1692 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1693 downloaddiff = fulldatasize - prevdownsize;
1694 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1695 prevdownsize = fulldatasize;
1696 f.write(databytes);
1697 f.seek(0);
1698 fdata = f.getvalue();
1699 f.close();
1700 ft.close();
1701 os.remove(tmpfilename);
1702 exec_time_end = time.time();
1703 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
1704 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1705 return returnval;
1707 if(not havehttplib2):
1708 def download_from_url_to_file_with_httplib2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1709 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
1710 return returnval;
1712 def download_from_url_with_request(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1713 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
1714 return returnval;
1716 def download_from_url_file_with_request(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1717 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
1718 return returnval;
1720 def download_from_url_to_file_with_request(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1721 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
1722 return returnval;
1724 if(haverequests):
1725 def download_from_url_with_requests(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1726 global geturls_download_sleep, havezstd, havebrotli;
1727 if(sleep<0):
1728 sleep = geturls_download_sleep;
1729 if(timeout<=0):
1730 timeout = 10;
1731 urlparts = urlparse.urlparse(httpurl);
1732 if(isinstance(httpheaders, list)):
1733 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
1734 httpheaders = fix_header_names(httpheaders);
1735 if(httpuseragent is not None):
1736 if('User-Agent' in httpheaders):
1737 httpheaders['User-Agent'] = httpuseragent;
1738 else:
1739 httpuseragent.update({'User-Agent': httpuseragent});
1740 if(httpreferer is not None):
1741 if('Referer' in httpheaders):
1742 httpheaders['Referer'] = httpreferer;
1743 else:
1744 httpuseragent.update({'Referer': httpreferer});
1745 if(urlparts.username is not None or urlparts.password is not None):
1746 if(sys.version[0]=="2"):
1747 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
1748 if(sys.version[0]>="3"):
1749 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
1750 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
1751 time.sleep(sleep);
1752 if(postdata is not None and not isinstance(postdata, dict)):
1753 postdata = urlencode(postdata);
1754 try:
1755 reqsession = requests.Session();
1756 if(httpmethod=="GET"):
1757 geturls_text = reqsession.get(httpurl, headers=httpheaders, cookies=httpcookie, stream=True);
1758 elif(httpmethod=="POST"):
1759 geturls_text = reqsession.post(httpurl, data=postdata, headers=httpheaders, cookies=httpcookie, stream=True);
1760 else:
1761 geturls_text = reqsession.get(httpurl, headers=httpheaders, cookies=httpcookie, stream=True);
1762 except requests.exceptions.ConnectTimeout:
1763 log.info("Error With URL "+httpurl);
1764 return False;
1765 except requests.exceptions.ConnectError:
1766 log.info("Error With URL "+httpurl);
1767 return False;
1768 except socket.timeout:
1769 log.info("Error With URL "+httpurl);
1770 return False;
1771 httpcodeout = geturls_text.status_code;
1772 httpcodereason = geturls_text.reason;
1773 if(geturls_text.raw.version=="10"):
1774 httpversionout = "1.0";
1775 else:
1776 httpversionout = "1.1";
1777 httpmethodout = httpmethod;
1778 httpurlout = geturls_text.url;
1779 httpheaderout = geturls_text.headers;
1780 httpheadersentout = geturls_text.request.headers;
1781 if(isinstance(httpheaderout, list)):
1782 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
1783 if(sys.version[0]=="2"):
1784 try:
1785 prehttpheaderout = httpheaderout;
1786 httpheaderkeys = httpheaderout.keys();
1787 imax = len(httpheaderkeys);
1788 ic = 0;
1789 httpheaderout = {};
1790 while(ic < imax):
1791 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
1792 ic += 1;
1793 except AttributeError:
1794 pass;
1795 httpheaderout = fix_header_names(httpheaderout);
1796 if(isinstance(httpheadersentout, list)):
1797 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
1798 httpheadersentout = fix_header_names(httpheadersentout);
1799 downloadsize = httpheaderout.get('Content-Length');
1800 if(downloadsize is not None):
1801 downloadsize = int(downloadsize);
1802 if downloadsize is None: downloadsize = 0;
1803 fulldatasize = 0;
1804 prevdownsize = 0;
1805 log.info("Downloading URL "+httpurl);
1806 with BytesIO() as strbuf:
1807 while True:
1808 databytes = geturls_text.raw.read(buffersize);
1809 if not databytes: break;
1810 datasize = len(databytes);
1811 fulldatasize = datasize + fulldatasize;
1812 percentage = "";
1813 if(downloadsize>0):
1814 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1815 downloaddiff = fulldatasize - prevdownsize;
1816 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1817 prevdownsize = fulldatasize;
1818 strbuf.write(databytes);
1819 strbuf.seek(0);
1820 returnval_content = strbuf.read();
1821 if(httpheaderout.get("Content-Encoding")=="gzip"):
1822 try:
1823 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
1824 except zlib.error:
1825 pass;
1826 elif(httpheaderout.get("Content-Encoding")=="deflate"):
1827 try:
1828 returnval_content = zlib.decompress(returnval_content);
1829 except zlib.error:
1830 pass;
1831 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
1832 try:
1833 returnval_content = brotli.decompress(returnval_content);
1834 except brotli.error:
1835 pass;
1836 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
1837 try:
1838 returnval_content = zstandard.decompress(returnval_content);
1839 except zstandard.error:
1840 pass;
1841 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
1842 try:
1843 returnval_content = lzma.decompress(returnval_content);
1844 except zstandard.error:
1845 pass;
1846 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
1847 try:
1848 returnval_content = bz2.decompress(returnval_content);
1849 except zstandard.error:
1850 pass;
1851 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "requests"};
1852 geturls_text.close();
1853 return returnval;
1855 if(not haverequests):
1856 def download_from_url_with_requests(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1857 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
1858 return returnval;
1860 if(haverequests):
1861 def download_from_url_file_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1862 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
1863 exec_time_start = time.time();
1864 myhash = hashlib.new("sha1");
1865 if(sys.version[0]=="2"):
1866 myhash.update(httpurl);
1867 myhash.update(str(buffersize));
1868 myhash.update(str(exec_time_start));
1869 if(sys.version[0]>="3"):
1870 myhash.update(httpurl.encode('utf-8'));
1871 myhash.update(str(buffersize).encode('utf-8'));
1872 myhash.update(str(exec_time_start).encode('utf-8'));
1873 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
1874 if(sleep<0):
1875 sleep = geturls_download_sleep;
1876 if(timeout<=0):
1877 timeout = 10;
1878 pretmpfilename = download_from_url_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
1879 if(not pretmpfilename):
1880 return False;
1881 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
1882 tmpfilename = f.name;
1883 try:
1884 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1885 except AttributeError:
1886 try:
1887 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1888 except ValueError:
1889 pass;
1890 except ValueError:
1891 pass;
1892 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1893 f.write(pretmpfilename.get('Content'));
1894 f.close();
1895 exec_time_end = time.time();
1896 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
1897 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
1898 return returnval;
1900 if(not haverequests):
1901 def download_from_url_file_with_requests(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
1902 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
1903 return returnval;
1905 if(haverequests):
1906 def download_from_url_to_file_with_requests(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1907 global geturls_download_sleep, havezstd, havebrotli;
1908 if(sleep<0):
1909 sleep = geturls_download_sleep;
1910 if(timeout<=0):
1911 timeout = 10;
1912 if(not outfile=="-"):
1913 outpath = outpath.rstrip(os.path.sep);
1914 filepath = os.path.realpath(outpath+os.path.sep+outfile);
1915 if(not os.path.exists(outpath)):
1916 os.makedirs(outpath);
1917 if(os.path.exists(outpath) and os.path.isfile(outpath)):
1918 return False;
1919 if(os.path.exists(filepath) and os.path.isdir(filepath)):
1920 return False;
1921 pretmpfilename = download_from_url_file_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1922 if(not pretmpfilename):
1923 return False;
1924 tmpfilename = pretmpfilename.get('Filename');
1925 downloadsize = int(os.path.getsize(tmpfilename));
1926 fulldatasize = 0;
1927 log.info("Moving file "+tmpfilename+" to "+filepath);
1928 exec_time_start = time.time();
1929 shutil.move(tmpfilename, filepath);
1930 try:
1931 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
1932 except AttributeError:
1933 try:
1934 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1935 except ValueError:
1936 pass;
1937 except ValueError:
1938 pass;
1939 exec_time_end = time.time();
1940 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
1941 if(os.path.exists(tmpfilename)):
1942 os.remove(tmpfilename);
1943 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1944 if(outfile=="-"):
1945 pretmpfilename = download_from_url_file_with_requests(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
1946 tmpfilename = pretmpfilename.get('Filename');
1947 downloadsize = int(os.path.getsize(tmpfilename));
1948 fulldatasize = 0;
1949 prevdownsize = 0;
1950 exec_time_start = time.time();
1951 with open(tmpfilename, 'rb') as ft:
1952 f = BytesIO();
1953 while True:
1954 databytes = ft.read(buffersize[1]);
1955 if not databytes: break;
1956 datasize = len(databytes);
1957 fulldatasize = datasize + fulldatasize;
1958 percentage = "";
1959 if(downloadsize>0):
1960 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
1961 downloaddiff = fulldatasize - prevdownsize;
1962 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
1963 prevdownsize = fulldatasize;
1964 f.write(databytes);
1965 f.seek(0);
1966 fdata = f.getvalue();
1967 f.close();
1968 ft.close();
1969 os.remove(tmpfilename);
1970 exec_time_end = time.time();
1971 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
1972 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
1973 return returnval;
1975 if(not haverequests):
1976 def download_from_url_to_file_with_requests(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
1977 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
1978 return returnval;
1980 if(haveaiohttp):
1981 def download_from_url_with_aiohttp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
1982 global geturls_download_sleep, havezstd, havebrotli;
1983 if(sleep<0):
1984 sleep = geturls_download_sleep;
1985 if(timeout<=0):
1986 timeout = 10;
1987 urlparts = urlparse.urlparse(httpurl);
1988 if(isinstance(httpheaders, list)):
1989 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
1990 httpheaders = fix_header_names(httpheaders);
1991 if(httpuseragent is not None):
1992 if('User-Agent' in httpheaders):
1993 httpheaders['User-Agent'] = httpuseragent;
1994 else:
1995 httpuseragent.update({'User-Agent': httpuseragent});
1996 if(httpreferer is not None):
1997 if('Referer' in httpheaders):
1998 httpheaders['Referer'] = httpreferer;
1999 else:
2000 httpuseragent.update({'Referer': httpreferer});
2001 if(urlparts.username is not None or urlparts.password is not None):
2002 if(sys.version[0]=="2"):
2003 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
2004 if(sys.version[0]>="3"):
2005 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
2006 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
2007 time.sleep(sleep);
2008 if(postdata is not None and not isinstance(postdata, dict)):
2009 postdata = urlencode(postdata);
2010 try:
2011 reqsession = aiohttp.ClientSession(cookie_jar=httpcookie, headers=httpheaders, timeout=timeout, read_timeout=timeout, conn_timeout=timeout, read_bufsize=buffersize);
2012 if(httpmethod=="GET"):
2013 geturls_text = reqsession.get(httpurl);
2014 elif(httpmethod=="POST"):
2015 geturls_text = reqsession.post(httpurl, data=postdata);
2016 else:
2017 geturls_text = reqsession.get(httpurl);
2018 except aiohttp.exceptions.ConnectTimeout:
2019 log.info("Error With URL "+httpurl);
2020 return False;
2021 except aiohttp.exceptions.ConnectError:
2022 log.info("Error With URL "+httpurl);
2023 return False;
2024 except socket.timeout:
2025 log.info("Error With URL "+httpurl);
2026 return False;
2027 httpcodeout = geturls_text.status;
2028 httpcodereason = geturls_text.reason;
2029 httpversionout = geturls_text.version;
2030 httpmethodout = geturls_text.method;
2031 httpurlout = geturls_text.url;
2032 httpheaderout = geturls_text.headers;
2033 httpheadersentout = geturls_text.request_info.headers;
2034 if(isinstance(httpheaderout, list)):
2035 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
2036 if(sys.version[0]=="2"):
2037 try:
2038 prehttpheaderout = httpheaderout;
2039 httpheaderkeys = httpheaderout.keys();
2040 imax = len(httpheaderkeys);
2041 ic = 0;
2042 httpheaderout = {};
2043 while(ic < imax):
2044 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
2045 ic += 1;
2046 except AttributeError:
2047 pass;
2048 httpheaderout = fix_header_names(httpheaderout);
2049 if(isinstance(httpheadersentout, list)):
2050 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
2051 httpheadersentout = fix_header_names(httpheadersentout);
2052 downloadsize = httpheaderout.get('Content-Length');
2053 if(downloadsize is not None):
2054 downloadsize = int(downloadsize);
2055 if downloadsize is None: downloadsize = 0;
2056 fulldatasize = 0;
2057 prevdownsize = 0;
2058 log.info("Downloading URL "+httpurl);
2059 with BytesIO() as strbuf:
2060 while True:
2061 databytes = geturls_text.read(buffersize);
2062 if not databytes: break;
2063 datasize = len(databytes);
2064 fulldatasize = datasize + fulldatasize;
2065 percentage = "";
2066 if(downloadsize>0):
2067 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2068 downloaddiff = fulldatasize - prevdownsize;
2069 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2070 prevdownsize = fulldatasize;
2071 strbuf.write(databytes);
2072 strbuf.seek(0);
2073 returnval_content = strbuf.read();
2074 if(httpheaderout.get("Content-Encoding")=="gzip"):
2075 try:
2076 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
2077 except zlib.error:
2078 pass;
2079 elif(httpheaderout.get("Content-Encoding")=="deflate"):
2080 try:
2081 returnval_content = zlib.decompress(returnval_content);
2082 except zlib.error:
2083 pass;
2084 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
2085 try:
2086 returnval_content = brotli.decompress(returnval_content);
2087 except brotli.error:
2088 pass;
2089 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
2090 try:
2091 returnval_content = zstandard.decompress(returnval_content);
2092 except zstandard.error:
2093 pass;
2094 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
2095 try:
2096 returnval_content = lzma.decompress(returnval_content);
2097 except zstandard.error:
2098 pass;
2099 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
2100 try:
2101 returnval_content = bz2.decompress(returnval_content);
2102 except zstandard.error:
2103 pass;
2104 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "aiohttp"};
2105 geturls_text.close();
2106 return returnval;
2108 if(not haveaiohttp):
2109 def download_from_url_with_aiohttp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2110 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
2111 return returnval;
2113 if(haveaiohttp):
2114 def download_from_url_file_with_aiohttp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2115 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
2116 exec_time_start = time.time();
2117 myhash = hashlib.new("sha1");
2118 if(sys.version[0]=="2"):
2119 myhash.update(httpurl);
2120 myhash.update(str(buffersize));
2121 myhash.update(str(exec_time_start));
2122 if(sys.version[0]>="3"):
2123 myhash.update(httpurl.encode('utf-8'));
2124 myhash.update(str(buffersize).encode('utf-8'));
2125 myhash.update(str(exec_time_start).encode('utf-8'));
2126 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
2127 if(sleep<0):
2128 sleep = geturls_download_sleep;
2129 if(timeout<=0):
2130 timeout = 10;
2131 pretmpfilename = download_from_url_with_aiohttp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
2132 if(not pretmpfilename):
2133 return False;
2134 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
2135 tmpfilename = f.name;
2136 try:
2137 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2138 except AttributeError:
2139 try:
2140 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2141 except ValueError:
2142 pass;
2143 except ValueError:
2144 pass;
2145 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2146 f.write(pretmpfilename.get('Content'));
2147 f.close();
2148 exec_time_end = time.time();
2149 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
2150 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
2151 return returnval;
2153 if(not haveaiohttp):
2154 def download_from_url_file_with_aiohttp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2155 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
2156 return returnval;
2158 if(haveaiohttp):
2159 def download_from_url_to_file_with_aiohttp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2160 global geturls_download_sleep, havezstd, havebrotli;
2161 if(sleep<0):
2162 sleep = geturls_download_sleep;
2163 if(timeout<=0):
2164 timeout = 10;
2165 if(not outfile=="-"):
2166 outpath = outpath.rstrip(os.path.sep);
2167 filepath = os.path.realpath(outpath+os.path.sep+outfile);
2168 if(not os.path.exists(outpath)):
2169 os.makedirs(outpath);
2170 if(os.path.exists(outpath) and os.path.isfile(outpath)):
2171 return False;
2172 if(os.path.exists(filepath) and os.path.isdir(filepath)):
2173 return False;
2174 pretmpfilename = download_from_url_file_with_aiohttp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2175 if(not pretmpfilename):
2176 return False;
2177 tmpfilename = pretmpfilename.get('Filename');
2178 downloadsize = int(os.path.getsize(tmpfilename));
2179 fulldatasize = 0;
2180 log.info("Moving file "+tmpfilename+" to "+filepath);
2181 exec_time_start = time.time();
2182 shutil.move(tmpfilename, filepath);
2183 try:
2184 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2185 except AttributeError:
2186 try:
2187 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2188 except ValueError:
2189 pass;
2190 except ValueError:
2191 pass;
2192 exec_time_end = time.time();
2193 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
2194 if(os.path.exists(tmpfilename)):
2195 os.remove(tmpfilename);
2196 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2197 if(outfile=="-"):
2198 pretmpfilename = download_from_url_file_with_aiohttp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2199 tmpfilename = pretmpfilename.get('Filename');
2200 downloadsize = int(os.path.getsize(tmpfilename));
2201 fulldatasize = 0;
2202 prevdownsize = 0;
2203 exec_time_start = time.time();
2204 with open(tmpfilename, 'rb') as ft:
2205 f = BytesIO();
2206 while True:
2207 databytes = ft.read(buffersize[1]);
2208 if not databytes: break;
2209 datasize = len(databytes);
2210 fulldatasize = datasize + fulldatasize;
2211 percentage = "";
2212 if(downloadsize>0):
2213 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2214 downloaddiff = fulldatasize - prevdownsize;
2215 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2216 prevdownsize = fulldatasize;
2217 f.write(databytes);
2218 f.seek(0);
2219 fdata = f.getvalue();
2220 f.close();
2221 ft.close();
2222 os.remove(tmpfilename);
2223 exec_time_end = time.time();
2224 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
2225 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2226 return returnval;
2228 if(not haveaiohttp):
2229 def download_from_url_to_file_with_aiohttp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2230 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
2231 return returnval;
2233 if(havehttpx):
2234 def download_from_url_with_httpx(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2235 global geturls_download_sleep, havezstd, havebrotli;
2236 if(sleep<0):
2237 sleep = geturls_download_sleep;
2238 if(timeout<=0):
2239 timeout = 10;
2240 urlparts = urlparse.urlparse(httpurl);
2241 if(isinstance(httpheaders, list)):
2242 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
2243 httpheaders = fix_header_names(httpheaders);
2244 if(httpuseragent is not None):
2245 if('User-Agent' in httpheaders):
2246 httpheaders['User-Agent'] = httpuseragent;
2247 else:
2248 httpuseragent.update({'User-Agent': httpuseragent});
2249 if(httpreferer is not None):
2250 if('Referer' in httpheaders):
2251 httpheaders['Referer'] = httpreferer;
2252 else:
2253 httpuseragent.update({'Referer': httpreferer});
2254 if(urlparts.username is not None or urlparts.password is not None):
2255 if(sys.version[0]=="2"):
2256 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
2257 if(sys.version[0]>="3"):
2258 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
2259 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
2260 time.sleep(sleep);
2261 if(postdata is not None and not isinstance(postdata, dict)):
2262 postdata = urlencode(postdata);
2263 try:
2264 if(httpmethod=="GET"):
2265 httpx_pool = httpx.Client(http1=True, http2=False, trust_env=True);
2266 geturls_text = httpx_pool.get(httpurl, timeout=timeout, headers=httpheaders, cookies=httpcookie);
2267 elif(httpmethod=="POST"):
2268 httpx_pool = httpx.Client(http1=True, http2=False, trust_env=True);
2269 geturls_text = httpx_pool.post(httpurl, timeout=timeout, data=postdata, headers=httpheaders, cookies=httpcookie);
2270 else:
2271 httpx_pool = httpx.Client(http1=True, http2=False, trust_env=True);
2272 geturls_text = httpx_pool.get(httpurl, timeout=timeout, headers=httpheaders, cookies=httpcookie);
2273 except httpx.ConnectTimeout:
2274 log.info("Error With URL "+httpurl);
2275 return False;
2276 except httpx.ConnectError:
2277 log.info("Error With URL "+httpurl);
2278 return False;
2279 except socket.timeout:
2280 log.info("Error With URL "+httpurl);
2281 return False;
2282 httpcodeout = geturls_text.status_code;
2283 try:
2284 httpcodereason = geturls_text.reason_phrase;
2285 except:
2286 httpcodereason = http_status_to_reason(geturls_text.status_code);
2287 httpversionout = geturls_text.http_version;
2288 httpmethodout = httpmethod;
2289 httpurlout = str(geturls_text.url);
2290 httpheaderout = geturls_text.headers;
2291 httpheadersentout = geturls_text.request.headers;
2292 if(isinstance(httpheaderout, list)):
2293 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
2294 if(sys.version[0]=="2"):
2295 try:
2296 prehttpheaderout = httpheaderout;
2297 httpheaderkeys = httpheaderout.keys();
2298 imax = len(httpheaderkeys);
2299 ic = 0;
2300 httpheaderout = {};
2301 while(ic < imax):
2302 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
2303 ic += 1;
2304 except AttributeError:
2305 pass;
2306 httpheaderout = fix_header_names(httpheaderout);
2307 if(isinstance(httpheadersentout, list)):
2308 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
2309 httpheadersentout = fix_header_names(httpheadersentout);
2310 downloadsize = httpheaderout.get('Content-Length');
2311 if(downloadsize is not None):
2312 downloadsize = int(downloadsize);
2313 if downloadsize is None: downloadsize = 0;
2314 fulldatasize = 0;
2315 prevdownsize = 0;
2316 log.info("Downloading URL "+httpurl);
2317 with BytesIO() as strbuf:
2318 while True:
2319 databytes = geturls_text.read();
2320 if not databytes: break;
2321 datasize = len(databytes);
2322 fulldatasize = datasize + fulldatasize;
2323 percentage = "";
2324 if(downloadsize>0):
2325 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2326 downloaddiff = fulldatasize - prevdownsize;
2327 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2328 prevdownsize = fulldatasize;
2329 strbuf.write(databytes);
2330 break;
2331 strbuf.seek(0);
2332 returnval_content = strbuf.read();
2333 geturls_text.close();
2334 if(httpheaderout.get("Content-Encoding")=="gzip"):
2335 try:
2336 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
2337 except zlib.error:
2338 pass;
2339 elif(httpheaderout.get("Content-Encoding")=="deflate"):
2340 try:
2341 returnval_content = zlib.decompress(returnval_content);
2342 except zlib.error:
2343 pass;
2344 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
2345 try:
2346 returnval_content = brotli.decompress(returnval_content);
2347 except brotli.error:
2348 pass;
2349 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
2350 try:
2351 returnval_content = zstandard.decompress(returnval_content);
2352 except zstandard.error:
2353 pass;
2354 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
2355 try:
2356 returnval_content = lzma.decompress(returnval_content);
2357 except zstandard.error:
2358 pass;
2359 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
2360 try:
2361 returnval_content = bz2.decompress(returnval_content);
2362 except zstandard.error:
2363 pass;
2364 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "httpx"};
2365 geturls_text.close();
2366 return returnval;
2368 if(not havehttpx):
2369 def download_from_url_with_httpx(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2370 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
2371 return returnval;
2373 if(havehttpx):
2374 def download_from_url_file_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2375 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
2376 exec_time_start = time.time();
2377 myhash = hashlib.new("sha1");
2378 if(sys.version[0]=="2"):
2379 myhash.update(httpurl);
2380 myhash.update(str(buffersize));
2381 myhash.update(str(exec_time_start));
2382 if(sys.version[0]>="3"):
2383 myhash.update(httpurl.encode('utf-8'));
2384 myhash.update(str(buffersize).encode('utf-8'));
2385 myhash.update(str(exec_time_start).encode('utf-8'));
2386 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
2387 if(sleep<0):
2388 sleep = geturls_download_sleep;
2389 if(timeout<=0):
2390 timeout = 10;
2391 pretmpfilename = download_from_url_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
2392 if(not pretmpfilename):
2393 return False;
2394 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
2395 tmpfilename = f.name;
2396 try:
2397 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2398 except AttributeError:
2399 try:
2400 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2401 except ValueError:
2402 pass;
2403 except ValueError:
2404 pass;
2405 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2406 f.write(pretmpfilename.get('Content'));
2407 f.close();
2408 exec_time_end = time.time();
2409 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
2410 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
2411 return returnval;
2413 if(not havehttpx):
2414 def download_from_url_file_with_httpx(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2415 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
2416 return returnval;
2418 if(havehttpx):
2419 def download_from_url_to_file_with_httpx(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2420 global geturls_download_sleep, havezstd, havebrotli;
2421 if(sleep<0):
2422 sleep = geturls_download_sleep;
2423 if(timeout<=0):
2424 timeout = 10;
2425 if(not outfile=="-"):
2426 outpath = outpath.rstrip(os.path.sep);
2427 filepath = os.path.realpath(outpath+os.path.sep+outfile);
2428 if(not os.path.exists(outpath)):
2429 os.makedirs(outpath);
2430 if(os.path.exists(outpath) and os.path.isfile(outpath)):
2431 return False;
2432 if(os.path.exists(filepath) and os.path.isdir(filepath)):
2433 return False;
2434 pretmpfilename = download_from_url_file_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2435 if(not pretmpfilename):
2436 return False;
2437 tmpfilename = pretmpfilename.get('Filename');
2438 downloadsize = int(os.path.getsize(tmpfilename));
2439 fulldatasize = 0;
2440 log.info("Moving file "+tmpfilename+" to "+filepath);
2441 exec_time_start = time.time();
2442 shutil.move(tmpfilename, filepath);
2443 try:
2444 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2445 except AttributeError:
2446 try:
2447 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2448 except ValueError:
2449 pass;
2450 except ValueError:
2451 pass;
2452 exec_time_end = time.time();
2453 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
2454 if(os.path.exists(tmpfilename)):
2455 os.remove(tmpfilename);
2456 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2457 if(outfile=="-"):
2458 pretmpfilename = download_from_url_file_with_httpx(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2459 tmpfilename = pretmpfilename.get('Filename');
2460 downloadsize = int(os.path.getsize(tmpfilename));
2461 fulldatasize = 0;
2462 prevdownsize = 0;
2463 exec_time_start = time.time();
2464 with open(tmpfilename, 'rb') as ft:
2465 f = BytesIO();
2466 while True:
2467 databytes = ft.read(buffersize[1]);
2468 if not databytes: break;
2469 datasize = len(databytes);
2470 fulldatasize = datasize + fulldatasize;
2471 percentage = "";
2472 if(downloadsize>0):
2473 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2474 downloaddiff = fulldatasize - prevdownsize;
2475 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2476 prevdownsize = fulldatasize;
2477 f.write(databytes);
2478 f.seek(0);
2479 fdata = f.getvalue();
2480 f.close();
2481 ft.close();
2482 os.remove(tmpfilename);
2483 exec_time_end = time.time();
2484 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
2485 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2486 return returnval;
2488 if(not havehttpx):
2489 def download_from_url_to_file_with_httpx(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2490 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
2491 return returnval;
2493 if(havehttpx):
2494 def download_from_url_with_httpx2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2495 global geturls_download_sleep, havezstd, havebrotli;
2496 if(sleep<0):
2497 sleep = geturls_download_sleep;
2498 if(timeout<=0):
2499 timeout = 10;
2500 urlparts = urlparse.urlparse(httpurl);
2501 if(isinstance(httpheaders, list)):
2502 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
2503 httpheaders = fix_header_names(httpheaders);
2504 if(httpuseragent is not None):
2505 if('User-Agent' in httpheaders):
2506 httpheaders['User-Agent'] = httpuseragent;
2507 else:
2508 httpuseragent.update({'User-Agent': httpuseragent});
2509 if(httpreferer is not None):
2510 if('Referer' in httpheaders):
2511 httpheaders['Referer'] = httpreferer;
2512 else:
2513 httpuseragent.update({'Referer': httpreferer});
2514 if(urlparts.username is not None or urlparts.password is not None):
2515 if(sys.version[0]=="2"):
2516 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
2517 if(sys.version[0]>="3"):
2518 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
2519 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
2520 time.sleep(sleep);
2521 if(postdata is not None and not isinstance(postdata, dict)):
2522 postdata = urlencode(postdata);
2523 try:
2524 if(httpmethod=="GET"):
2525 httpx_pool = httpx.Client(http1=True, http2=True, trust_env=True);
2526 geturls_text = httpx_pool.get(httpurl, timeout=timeout, headers=httpheaders, cookies=httpcookie);
2527 elif(httpmethod=="POST"):
2528 httpx_pool = httpx.Client(http1=True, http2=True, trust_env=True);
2529 geturls_text = httpx_pool.post(httpurl, timeout=timeout, data=postdata, headers=httpheaders, cookies=httpcookie);
2530 else:
2531 httpx_pool = httpx.Client(http1=True, http2=True, trust_env=True);
2532 geturls_text = httpx_pool.get(httpurl, timeout=timeout, headers=httpheaders, cookies=httpcookie);
2533 except httpx.ConnectTimeout:
2534 log.info("Error With URL "+httpurl);
2535 return False;
2536 except httpx.ConnectError:
2537 log.info("Error With URL "+httpurl);
2538 return False;
2539 except socket.timeout:
2540 log.info("Error With URL "+httpurl);
2541 return False;
2542 httpcodeout = geturls_text.status_code;
2543 try:
2544 httpcodereason = geturls_text.reason_phrase;
2545 except:
2546 httpcodereason = http_status_to_reason(geturls_text.status_code);
2547 httpversionout = geturls_text.http_version;
2548 httpmethodout = httpmethod;
2549 httpurlout = str(geturls_text.url);
2550 httpheaderout = geturls_text.headers;
2551 httpheadersentout = geturls_text.request.headers;
2552 if(isinstance(httpheaderout, list)):
2553 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
2554 if(sys.version[0]=="2"):
2555 try:
2556 prehttpheaderout = httpheaderout;
2557 httpheaderkeys = httpheaderout.keys();
2558 imax = len(httpheaderkeys);
2559 ic = 0;
2560 httpheaderout = {};
2561 while(ic < imax):
2562 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
2563 ic += 1;
2564 except AttributeError:
2565 pass;
2566 httpheaderout = fix_header_names(httpheaderout);
2567 if(isinstance(httpheadersentout, list)):
2568 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
2569 httpheadersentout = fix_header_names(httpheadersentout);
2570 downloadsize = httpheaderout.get('Content-Length');
2571 if(downloadsize is not None):
2572 downloadsize = int(downloadsize);
2573 if downloadsize is None: downloadsize = 0;
2574 fulldatasize = 0;
2575 prevdownsize = 0;
2576 log.info("Downloading URL "+httpurl);
2577 with BytesIO() as strbuf:
2578 while True:
2579 databytes = geturls_text.read();
2580 if not databytes: break;
2581 datasize = len(databytes);
2582 fulldatasize = datasize + fulldatasize;
2583 percentage = "";
2584 if(downloadsize>0):
2585 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2586 downloaddiff = fulldatasize - prevdownsize;
2587 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2588 prevdownsize = fulldatasize;
2589 strbuf.write(databytes);
2590 break;
2591 strbuf.seek(0);
2592 returnval_content = strbuf.read();
2593 geturls_text.close();
2594 if(httpheaderout.get("Content-Encoding")=="gzip"):
2595 try:
2596 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
2597 except zlib.error:
2598 pass;
2599 elif(httpheaderout.get("Content-Encoding")=="deflate"):
2600 try:
2601 returnval_content = zlib.decompress(returnval_content);
2602 except zlib.error:
2603 pass;
2604 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
2605 try:
2606 returnval_content = brotli.decompress(returnval_content);
2607 except brotli.error:
2608 pass;
2609 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
2610 try:
2611 returnval_content = zstandard.decompress(returnval_content);
2612 except zstandard.error:
2613 pass;
2614 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
2615 try:
2616 returnval_content = lzma.decompress(returnval_content);
2617 except zstandard.error:
2618 pass;
2619 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
2620 try:
2621 returnval_content = bz2.decompress(returnval_content);
2622 except zstandard.error:
2623 pass;
2624 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "httpx2"};
2625 geturls_text.close();
2626 return returnval;
2628 if(not havehttpx):
2629 def download_from_url_with_httpx2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2630 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
2631 return returnval;
2633 if(havehttpx):
2634 def download_from_url_file_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2635 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
2636 exec_time_start = time.time();
2637 myhash = hashlib.new("sha1");
2638 if(sys.version[0]=="2"):
2639 myhash.update(httpurl);
2640 myhash.update(str(buffersize));
2641 myhash.update(str(exec_time_start));
2642 if(sys.version[0]>="3"):
2643 myhash.update(httpurl.encode('utf-8'));
2644 myhash.update(str(buffersize).encode('utf-8'));
2645 myhash.update(str(exec_time_start).encode('utf-8'));
2646 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
2647 if(sleep<0):
2648 sleep = geturls_download_sleep;
2649 if(timeout<=0):
2650 timeout = 10;
2651 pretmpfilename = download_from_url_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
2652 if(not pretmpfilename):
2653 return False;
2654 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
2655 tmpfilename = f.name;
2656 try:
2657 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2658 except AttributeError:
2659 try:
2660 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2661 except ValueError:
2662 pass;
2663 except ValueError:
2664 pass;
2665 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2666 f.write(pretmpfilename.get('Content'));
2667 f.close();
2668 exec_time_end = time.time();
2669 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
2670 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
2671 return returnval;
2673 if(not havehttpx):
2674 def download_from_url_file_with_httpx2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2675 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
2676 return returnval;
2678 if(havehttpx):
2679 def download_from_url_to_file_with_httpx2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2680 global geturls_download_sleep, havezstd, havebrotli;
2681 if(sleep<0):
2682 sleep = geturls_download_sleep;
2683 if(timeout<=0):
2684 timeout = 10;
2685 if(not outfile=="-"):
2686 outpath = outpath.rstrip(os.path.sep);
2687 filepath = os.path.realpath(outpath+os.path.sep+outfile);
2688 if(not os.path.exists(outpath)):
2689 os.makedirs(outpath);
2690 if(os.path.exists(outpath) and os.path.isfile(outpath)):
2691 return False;
2692 if(os.path.exists(filepath) and os.path.isdir(filepath)):
2693 return False;
2694 pretmpfilename = download_from_url_file_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2695 if(not pretmpfilename):
2696 return False;
2697 tmpfilename = pretmpfilename.get('Filename');
2698 downloadsize = int(os.path.getsize(tmpfilename));
2699 fulldatasize = 0;
2700 log.info("Moving file "+tmpfilename+" to "+filepath);
2701 exec_time_start = time.time();
2702 shutil.move(tmpfilename, filepath);
2703 try:
2704 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2705 except AttributeError:
2706 try:
2707 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2708 except ValueError:
2709 pass;
2710 except ValueError:
2711 pass;
2712 exec_time_end = time.time();
2713 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
2714 if(os.path.exists(tmpfilename)):
2715 os.remove(tmpfilename);
2716 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2717 if(outfile=="-"):
2718 pretmpfilename = download_from_url_file_with_httpx2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2719 tmpfilename = pretmpfilename.get('Filename');
2720 downloadsize = int(os.path.getsize(tmpfilename));
2721 fulldatasize = 0;
2722 prevdownsize = 0;
2723 exec_time_start = time.time();
2724 with open(tmpfilename, 'rb') as ft:
2725 f = BytesIO();
2726 while True:
2727 databytes = ft.read(buffersize[1]);
2728 if not databytes: break;
2729 datasize = len(databytes);
2730 fulldatasize = datasize + fulldatasize;
2731 percentage = "";
2732 if(downloadsize>0):
2733 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2734 downloaddiff = fulldatasize - prevdownsize;
2735 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2736 prevdownsize = fulldatasize;
2737 f.write(databytes);
2738 f.seek(0);
2739 fdata = f.getvalue();
2740 f.close();
2741 ft.close();
2742 os.remove(tmpfilename);
2743 exec_time_end = time.time();
2744 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
2745 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2746 return returnval;
2748 if(not havehttpx):
2749 def download_from_url_to_file_with_httpx2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2750 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
2751 return returnval;
2753 if(havehttpcore):
2754 def download_from_url_with_httpcore(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2755 global geturls_download_sleep, havezstd, havebrotli;
2756 if(sleep<0):
2757 sleep = geturls_download_sleep;
2758 if(timeout<=0):
2759 timeout = 10;
2760 urlparts = urlparse.urlparse(httpurl);
2761 if(isinstance(httpheaders, list)):
2762 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
2763 httpheaders = fix_header_names(httpheaders);
2764 if(httpuseragent is not None):
2765 if('User-Agent' in httpheaders):
2766 httpheaders['User-Agent'] = httpuseragent;
2767 else:
2768 httpuseragent.update({'User-Agent': httpuseragent});
2769 if(httpreferer is not None):
2770 if('Referer' in httpheaders):
2771 httpheaders['Referer'] = httpreferer;
2772 else:
2773 httpuseragent.update({'Referer': httpreferer});
2774 if(urlparts.username is not None or urlparts.password is not None):
2775 if(sys.version[0]=="2"):
2776 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
2777 if(sys.version[0]>="3"):
2778 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
2779 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
2780 time.sleep(sleep);
2781 if(postdata is not None and not isinstance(postdata, dict)):
2782 postdata = urlencode(postdata);
2783 try:
2784 if(httpmethod=="GET"):
2785 httpx_pool = httpcore.ConnectionPool(http1=True, http2=False);
2786 geturls_text = httpx_pool.request("GET", httpurl, headers=httpheaders);
2787 elif(httpmethod=="POST"):
2788 httpx_pool = httpcore.ConnectionPool(http1=True, http2=False);
2789 geturls_text = httpx_pool.request("GET", httpurl, data=postdata, headers=httpheaders);
2790 else:
2791 httpx_pool = httpcore.ConnectionPool(http1=True, http2=False);
2792 geturls_text = httpx_pool.request("GET", httpurl, headers=httpheaders);
2793 except httpcore.ConnectTimeout:
2794 log.info("Error With URL "+httpurl);
2795 return False;
2796 except httpcore.ConnectError:
2797 log.info("Error With URL "+httpurl);
2798 return False;
2799 except socket.timeout:
2800 log.info("Error With URL "+httpurl);
2801 return False;
2802 httpcodeout = geturls_text.status;
2803 httpcodereason = http_status_to_reason(geturls_text.status);
2804 httpversionout = "1.1";
2805 httpmethodout = httpmethod;
2806 httpurlout = str(httpurl);
2807 httpheaderout = geturls_text.headers;
2808 httpheadersentout = httpheaders;
2809 if(isinstance(httpheaderout, list)):
2810 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
2811 if(sys.version[0]=="2"):
2812 try:
2813 prehttpheaderout = httpheaderout;
2814 httpheaderkeys = httpheaderout.keys();
2815 imax = len(httpheaderkeys);
2816 ic = 0;
2817 httpheaderout = {};
2818 while(ic < imax):
2819 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
2820 ic += 1;
2821 except AttributeError:
2822 pass;
2823 httpheaderout = fix_header_names(httpheaderout);
2824 if(isinstance(httpheadersentout, list)):
2825 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
2826 httpheadersentout = fix_header_names(httpheadersentout);
2827 downloadsize = httpheaderout.get('Content-Length');
2828 if(downloadsize is not None):
2829 downloadsize = int(downloadsize);
2830 if downloadsize is None: downloadsize = 0;
2831 fulldatasize = 0;
2832 prevdownsize = 0;
2833 log.info("Downloading URL "+httpurl);
2834 with BytesIO() as strbuf:
2835 while True:
2836 databytes = geturls_text.read();
2837 if not databytes: break;
2838 datasize = len(databytes);
2839 fulldatasize = datasize + fulldatasize;
2840 percentage = "";
2841 if(downloadsize>0):
2842 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2843 downloaddiff = fulldatasize - prevdownsize;
2844 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2845 prevdownsize = fulldatasize;
2846 strbuf.write(databytes);
2847 break;
2848 strbuf.seek(0);
2849 returnval_content = strbuf.read();
2850 geturls_text.close();
2851 if(httpheaderout.get("Content-Encoding")=="gzip"):
2852 try:
2853 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
2854 except zlib.error:
2855 pass;
2856 elif(httpheaderout.get("Content-Encoding")=="deflate"):
2857 try:
2858 returnval_content = zlib.decompress(returnval_content);
2859 except zlib.error:
2860 pass;
2861 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
2862 try:
2863 returnval_content = brotli.decompress(returnval_content);
2864 except brotli.error:
2865 pass;
2866 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
2867 try:
2868 returnval_content = zstandard.decompress(returnval_content);
2869 except zstandard.error:
2870 pass;
2871 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
2872 try:
2873 returnval_content = lzma.decompress(returnval_content);
2874 except zstandard.error:
2875 pass;
2876 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
2877 try:
2878 returnval_content = bz2.decompress(returnval_content);
2879 except zstandard.error:
2880 pass;
2881 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "httpcore"};
2882 geturls_text.close();
2883 return returnval;
2885 if(not havehttpcore):
2886 def download_from_url_with_httpcore(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
2887 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
2888 return returnval;
2890 if(havehttpcore):
2891 def download_from_url_file_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2892 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
2893 exec_time_start = time.time();
2894 myhash = hashlib.new("sha1");
2895 if(sys.version[0]=="2"):
2896 myhash.update(httpurl);
2897 myhash.update(str(buffersize));
2898 myhash.update(str(exec_time_start));
2899 if(sys.version[0]>="3"):
2900 myhash.update(httpurl.encode('utf-8'));
2901 myhash.update(str(buffersize).encode('utf-8'));
2902 myhash.update(str(exec_time_start).encode('utf-8'));
2903 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
2904 if(sleep<0):
2905 sleep = geturls_download_sleep;
2906 if(timeout<=0):
2907 timeout = 10;
2908 pretmpfilename = download_from_url_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
2909 if(not pretmpfilename):
2910 return False;
2911 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
2912 tmpfilename = f.name;
2913 try:
2914 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2915 except AttributeError:
2916 try:
2917 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2918 except ValueError:
2919 pass;
2920 except ValueError:
2921 pass;
2922 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2923 f.write(pretmpfilename.get('Content'));
2924 f.close();
2925 exec_time_end = time.time();
2926 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
2927 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
2928 return returnval;
2930 if(not havehttpcore):
2931 def download_from_url_file_with_httpcore(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
2932 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
2933 return returnval;
2935 if(havehttpcore):
2936 def download_from_url_to_file_with_httpcore(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
2937 global geturls_download_sleep, havezstd, havebrotli;
2938 if(sleep<0):
2939 sleep = geturls_download_sleep;
2940 if(timeout<=0):
2941 timeout = 10;
2942 if(not outfile=="-"):
2943 outpath = outpath.rstrip(os.path.sep);
2944 filepath = os.path.realpath(outpath+os.path.sep+outfile);
2945 if(not os.path.exists(outpath)):
2946 os.makedirs(outpath);
2947 if(os.path.exists(outpath) and os.path.isfile(outpath)):
2948 return False;
2949 if(os.path.exists(filepath) and os.path.isdir(filepath)):
2950 return False;
2951 pretmpfilename = download_from_url_file_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2952 if(not pretmpfilename):
2953 return False;
2954 tmpfilename = pretmpfilename.get('Filename');
2955 downloadsize = int(os.path.getsize(tmpfilename));
2956 fulldatasize = 0;
2957 log.info("Moving file "+tmpfilename+" to "+filepath);
2958 exec_time_start = time.time();
2959 shutil.move(tmpfilename, filepath);
2960 try:
2961 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
2962 except AttributeError:
2963 try:
2964 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2965 except ValueError:
2966 pass;
2967 except ValueError:
2968 pass;
2969 exec_time_end = time.time();
2970 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
2971 if(os.path.exists(tmpfilename)):
2972 os.remove(tmpfilename);
2973 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
2974 if(outfile=="-"):
2975 pretmpfilename = download_from_url_file_with_httpcore(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
2976 tmpfilename = pretmpfilename.get('Filename');
2977 downloadsize = int(os.path.getsize(tmpfilename));
2978 fulldatasize = 0;
2979 prevdownsize = 0;
2980 exec_time_start = time.time();
2981 with open(tmpfilename, 'rb') as ft:
2982 f = BytesIO();
2983 while True:
2984 databytes = ft.read(buffersize[1]);
2985 if not databytes: break;
2986 datasize = len(databytes);
2987 fulldatasize = datasize + fulldatasize;
2988 percentage = "";
2989 if(downloadsize>0):
2990 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
2991 downloaddiff = fulldatasize - prevdownsize;
2992 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
2993 prevdownsize = fulldatasize;
2994 f.write(databytes);
2995 f.seek(0);
2996 fdata = f.getvalue();
2997 f.close();
2998 ft.close();
2999 os.remove(tmpfilename);
3000 exec_time_end = time.time();
3001 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
3002 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3003 return returnval;
3005 if(not havehttpcore):
3006 def download_from_url_to_file_with_httpcore(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3007 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
3008 return returnval;
3010 if(havehttpcore):
3011 def download_from_url_with_httpcore2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3012 global geturls_download_sleep, havezstd, havebrotli;
3013 if(sleep<0):
3014 sleep = geturls_download_sleep;
3015 if(timeout<=0):
3016 timeout = 10;
3017 urlparts = urlparse.urlparse(httpurl);
3018 if(isinstance(httpheaders, list)):
3019 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
3020 httpheaders = fix_header_names(httpheaders);
3021 if(httpuseragent is not None):
3022 if('User-Agent' in httpheaders):
3023 httpheaders['User-Agent'] = httpuseragent;
3024 else:
3025 httpuseragent.update({'User-Agent': httpuseragent});
3026 if(httpreferer is not None):
3027 if('Referer' in httpheaders):
3028 httpheaders['Referer'] = httpreferer;
3029 else:
3030 httpuseragent.update({'Referer': httpreferer});
3031 if(urlparts.username is not None or urlparts.password is not None):
3032 if(sys.version[0]=="2"):
3033 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
3034 if(sys.version[0]>="3"):
3035 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
3036 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
3037 time.sleep(sleep);
3038 if(postdata is not None and not isinstance(postdata, dict)):
3039 postdata = urlencode(postdata);
3040 try:
3041 if(httpmethod=="GET"):
3042 httpx_pool = httpcore.ConnectionPool(http1=True, http2=True);
3043 geturls_text = httpx_pool.request("GET", httpurl, headers=httpheaders);
3044 elif(httpmethod=="POST"):
3045 httpx_pool = httpcore.ConnectionPool(http1=True, http2=True);
3046 geturls_text = httpx_pool.request("GET", httpurl, data=postdata, headers=httpheaders);
3047 else:
3048 httpx_pool = httpcore.ConnectionPool(http1=True, http2=True);
3049 geturls_text = httpx_pool.request("GET", httpurl, headers=httpheaders);
3050 except httpcore.ConnectTimeout:
3051 log.info("Error With URL "+httpurl);
3052 return False;
3053 except httpcore.ConnectError:
3054 log.info("Error With URL "+httpurl);
3055 return False;
3056 except socket.timeout:
3057 log.info("Error With URL "+httpurl);
3058 return False;
3059 httpcodeout = geturls_text.status;
3060 httpcodereason = http_status_to_reason(geturls_text.status);
3061 httpversionout = "1.1";
3062 httpmethodout = httpmethod;
3063 httpurlout = str(httpurl);
3064 httpheaderout = geturls_text.headers;
3065 httpheadersentout = httpheaders;
3066 if(isinstance(httpheaderout, list)):
3067 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
3068 if(sys.version[0]=="2"):
3069 try:
3070 prehttpheaderout = httpheaderout;
3071 httpheaderkeys = httpheaderout.keys();
3072 imax = len(httpheaderkeys);
3073 ic = 0;
3074 httpheaderout = {};
3075 while(ic < imax):
3076 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
3077 ic += 1;
3078 except AttributeError:
3079 pass;
3080 httpheaderout = fix_header_names(httpheaderout);
3081 if(isinstance(httpheadersentout, list)):
3082 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
3083 httpheadersentout = fix_header_names(httpheadersentout);
3084 downloadsize = httpheaderout.get('Content-Length');
3085 if(downloadsize is not None):
3086 downloadsize = int(downloadsize);
3087 if downloadsize is None: downloadsize = 0;
3088 fulldatasize = 0;
3089 prevdownsize = 0;
3090 log.info("Downloading URL "+httpurl);
3091 with BytesIO() as strbuf:
3092 while True:
3093 databytes = geturls_text.read();
3094 if not databytes: break;
3095 datasize = len(databytes);
3096 fulldatasize = datasize + fulldatasize;
3097 percentage = "";
3098 if(downloadsize>0):
3099 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3100 downloaddiff = fulldatasize - prevdownsize;
3101 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3102 prevdownsize = fulldatasize;
3103 strbuf.write(databytes);
3104 break;
3105 strbuf.seek(0);
3106 returnval_content = strbuf.read();
3107 geturls_text.close();
3108 if(httpheaderout.get("Content-Encoding")=="gzip"):
3109 try:
3110 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
3111 except zlib.error:
3112 pass;
3113 elif(httpheaderout.get("Content-Encoding")=="deflate"):
3114 try:
3115 returnval_content = zlib.decompress(returnval_content);
3116 except zlib.error:
3117 pass;
3118 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
3119 try:
3120 returnval_content = brotli.decompress(returnval_content);
3121 except brotli.error:
3122 pass;
3123 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
3124 try:
3125 returnval_content = zstandard.decompress(returnval_content);
3126 except zstandard.error:
3127 pass;
3128 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
3129 try:
3130 returnval_content = lzma.decompress(returnval_content);
3131 except zstandard.error:
3132 pass;
3133 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
3134 try:
3135 returnval_content = bz2.decompress(returnval_content);
3136 except zstandard.error:
3137 pass;
3138 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "httpcore2"};
3139 geturls_text.close();
3140 return returnval;
3142 if(not havehttpcore):
3143 def download_from_url_with_httpcore2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3144 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
3145 return returnval;
3147 if(havehttpcore):
3148 def download_from_url_file_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3149 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
3150 exec_time_start = time.time();
3151 myhash = hashlib.new("sha1");
3152 if(sys.version[0]=="2"):
3153 myhash.update(httpurl);
3154 myhash.update(str(buffersize));
3155 myhash.update(str(exec_time_start));
3156 if(sys.version[0]>="3"):
3157 myhash.update(httpurl.encode('utf-8'));
3158 myhash.update(str(buffersize).encode('utf-8'));
3159 myhash.update(str(exec_time_start).encode('utf-8'));
3160 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
3161 if(sleep<0):
3162 sleep = geturls_download_sleep;
3163 if(timeout<=0):
3164 timeout = 10;
3165 pretmpfilename = download_from_url_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
3166 if(not pretmpfilename):
3167 return False;
3168 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
3169 tmpfilename = f.name;
3170 try:
3171 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
3172 except AttributeError:
3173 try:
3174 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3175 except ValueError:
3176 pass;
3177 except ValueError:
3178 pass;
3179 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3180 f.write(pretmpfilename.get('Content'));
3181 f.close();
3182 exec_time_end = time.time();
3183 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
3184 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
3185 return returnval;
3187 if(not havehttpcore):
3188 def download_from_url_file_with_httpcore2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3189 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
3190 return returnval;
3192 if(havehttpcore):
3193 def download_from_url_to_file_with_httpcore2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3194 global geturls_download_sleep, havezstd, havebrotli;
3195 if(sleep<0):
3196 sleep = geturls_download_sleep;
3197 if(timeout<=0):
3198 timeout = 10;
3199 if(not outfile=="-"):
3200 outpath = outpath.rstrip(os.path.sep);
3201 filepath = os.path.realpath(outpath+os.path.sep+outfile);
3202 if(not os.path.exists(outpath)):
3203 os.makedirs(outpath);
3204 if(os.path.exists(outpath) and os.path.isfile(outpath)):
3205 return False;
3206 if(os.path.exists(filepath) and os.path.isdir(filepath)):
3207 return False;
3208 pretmpfilename = download_from_url_file_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3209 if(not pretmpfilename):
3210 return False;
3211 tmpfilename = pretmpfilename.get('Filename');
3212 downloadsize = int(os.path.getsize(tmpfilename));
3213 fulldatasize = 0;
3214 log.info("Moving file "+tmpfilename+" to "+filepath);
3215 exec_time_start = time.time();
3216 shutil.move(tmpfilename, filepath);
3217 try:
3218 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
3219 except AttributeError:
3220 try:
3221 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3222 except ValueError:
3223 pass;
3224 except ValueError:
3225 pass;
3226 exec_time_end = time.time();
3227 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
3228 if(os.path.exists(tmpfilename)):
3229 os.remove(tmpfilename);
3230 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3231 if(outfile=="-"):
3232 pretmpfilename = download_from_url_file_with_httpcore2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3233 tmpfilename = pretmpfilename.get('Filename');
3234 downloadsize = int(os.path.getsize(tmpfilename));
3235 fulldatasize = 0;
3236 prevdownsize = 0;
3237 exec_time_start = time.time();
3238 with open(tmpfilename, 'rb') as ft:
3239 f = BytesIO();
3240 while True:
3241 databytes = ft.read(buffersize[1]);
3242 if not databytes: break;
3243 datasize = len(databytes);
3244 fulldatasize = datasize + fulldatasize;
3245 percentage = "";
3246 if(downloadsize>0):
3247 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3248 downloaddiff = fulldatasize - prevdownsize;
3249 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3250 prevdownsize = fulldatasize;
3251 f.write(databytes);
3252 f.seek(0);
3253 fdata = f.getvalue();
3254 f.close();
3255 ft.close();
3256 os.remove(tmpfilename);
3257 exec_time_end = time.time();
3258 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
3259 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3260 return returnval;
3262 if(not havehttpx):
3263 def download_from_url_to_file_with_httpcore2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3264 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
3265 return returnval;
3267 if(haveurllib3):
3268 def download_from_url_with_request3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3269 returnval = download_from_url_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
3270 return returnval;
3272 if(not haveurllib3):
3273 def download_from_url_with_request3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3274 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
3275 return returnval;
3277 if(haveurllib3):
3278 def download_from_url_file_with_request3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3279 returnval = download_from_url_file_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
3280 return returnval;
3282 if(not haveurllib3):
3283 def download_from_url_file_with_request3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3284 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
3285 return returnval;
3287 if(haveurllib3):
3288 def download_from_url_to_file_with_request3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3289 returnval = download_from_url_to_file_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
3290 return returnval;
3292 if(not haveurllib3):
3293 def download_from_url_to_file_with_request3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3294 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
3295 return returnval;
3297 if(haveurllib3):
3298 def download_from_url_with_urllib3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3299 global geturls_download_sleep, havezstd, havebrotli;
3300 if(sleep<0):
3301 sleep = geturls_download_sleep;
3302 if(timeout<=0):
3303 timeout = 10;
3304 urlparts = urlparse.urlparse(httpurl);
3305 if(isinstance(httpheaders, list)):
3306 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
3307 httpheaders = fix_header_names(httpheaders);
3308 if(httpuseragent is not None):
3309 if('User-Agent' in httpheaders):
3310 httpheaders['User-Agent'] = httpuseragent;
3311 else:
3312 httpuseragent.update({'User-Agent': httpuseragent});
3313 if(httpreferer is not None):
3314 if('Referer' in httpheaders):
3315 httpheaders['Referer'] = httpreferer;
3316 else:
3317 httpuseragent.update({'Referer': httpreferer});
3318 if(urlparts.username is not None or urlparts.password is not None):
3319 if(sys.version[0]=="2"):
3320 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
3321 if(sys.version[0]>="3"):
3322 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
3323 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
3324 time.sleep(sleep);
3325 timeout = urllib3.util.Timeout(connect=timeout, read=timeout);
3326 urllib_pool = urllib3.PoolManager(headers=httpheaders, timeout=timeout);
3327 if(postdata is not None and not isinstance(postdata, dict)):
3328 postdata = urlencode(postdata);
3329 try:
3330 if(httpmethod=="GET"):
3331 geturls_text = urllib_pool.request("GET", httpurl, headers=httpheaders, preload_content=False);
3332 elif(httpmethod=="POST"):
3333 geturls_text = urllib_pool.request("POST", httpurl, body=postdata, headers=httpheaders, preload_content=False);
3334 else:
3335 geturls_text = urllib_pool.request("GET", httpurl, headers=httpheaders, preload_content=False);
3336 except urllib3.exceptions.ConnectTimeoutError:
3337 log.info("Error With URL "+httpurl);
3338 return False;
3339 except urllib3.exceptions.ConnectError:
3340 log.info("Error With URL "+httpurl);
3341 return False;
3342 except urllib3.exceptions.MaxRetryError:
3343 log.info("Error With URL "+httpurl);
3344 return False;
3345 except socket.timeout:
3346 log.info("Error With URL "+httpurl);
3347 return False;
3348 except ValueError:
3349 log.info("Error With URL "+httpurl);
3350 return False;
3351 httpcodeout = geturls_text.status;
3352 httpcodereason = geturls_text.reason;
3353 if(geturls_text.version=="10"):
3354 httpversionout = "1.0";
3355 else:
3356 httpversionout = "1.1";
3357 httpmethodout = httpmethod;
3358 httpurlout = geturls_text.geturl();
3359 httpheaderout = geturls_text.info();
3360 httpheadersentout = httpheaders;
3361 if(isinstance(httpheaderout, list)):
3362 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
3363 if(sys.version[0]=="2"):
3364 try:
3365 prehttpheaderout = httpheaderout;
3366 httpheaderkeys = httpheaderout.keys();
3367 imax = len(httpheaderkeys);
3368 ic = 0;
3369 httpheaderout = {};
3370 while(ic < imax):
3371 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
3372 ic += 1;
3373 except AttributeError:
3374 pass;
3375 httpheaderout = fix_header_names(httpheaderout);
3376 if(isinstance(httpheadersentout, list)):
3377 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
3378 httpheadersentout = fix_header_names(httpheadersentout);
3379 downloadsize = httpheaderout.get('Content-Length');
3380 if(downloadsize is not None):
3381 downloadsize = int(downloadsize);
3382 if downloadsize is None: downloadsize = 0;
3383 fulldatasize = 0;
3384 prevdownsize = 0;
3385 log.info("Downloading URL "+httpurl);
3386 with BytesIO() as strbuf:
3387 while True:
3388 databytes = geturls_text.read(buffersize);
3389 if not databytes: break;
3390 datasize = len(databytes);
3391 fulldatasize = datasize + fulldatasize;
3392 percentage = "";
3393 if(downloadsize>0):
3394 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3395 downloaddiff = fulldatasize - prevdownsize;
3396 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3397 prevdownsize = fulldatasize;
3398 strbuf.write(databytes);
3399 strbuf.seek(0);
3400 returnval_content = strbuf.read();
3401 if(httpheaderout.get("Content-Encoding")=="gzip"):
3402 try:
3403 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
3404 except zlib.error:
3405 pass;
3406 elif(httpheaderout.get("Content-Encoding")=="deflate"):
3407 try:
3408 returnval_content = zlib.decompress(returnval_content);
3409 except zlib.error:
3410 pass;
3411 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
3412 try:
3413 returnval_content = brotli.decompress(returnval_content);
3414 except brotli.error:
3415 pass;
3416 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
3417 try:
3418 returnval_content = zstandard.decompress(returnval_content);
3419 except zstandard.error:
3420 pass;
3421 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
3422 try:
3423 returnval_content = lzma.decompress(returnval_content);
3424 except zstandard.error:
3425 pass;
3426 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
3427 try:
3428 returnval_content = bz2.decompress(returnval_content);
3429 except zstandard.error:
3430 pass;
3431 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "urllib3"};
3432 geturls_text.close();
3433 return returnval;
3435 if(not haveurllib3):
3436 def download_from_url_with_urllib3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3437 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
3438 return returnval;
3440 if(haveurllib3):
3441 def download_from_url_file_with_urllib3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3442 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
3443 exec_time_start = time.time();
3444 myhash = hashlib.new("sha1");
3445 if(sys.version[0]=="2"):
3446 myhash.update(httpurl);
3447 myhash.update(str(buffersize));
3448 myhash.update(str(exec_time_start));
3449 if(sys.version[0]>="3"):
3450 myhash.update(httpurl.encode('utf-8'));
3451 myhash.update(str(buffersize).encode('utf-8'));
3452 myhash.update(str(exec_time_start).encode('utf-8'));
3453 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
3454 if(sleep<0):
3455 sleep = geturls_download_sleep;
3456 if(timeout<=0):
3457 timeout = 10;
3458 pretmpfilename = download_from_url_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
3459 if(not pretmpfilename):
3460 return False;
3461 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
3462 tmpfilename = f.name;
3463 try:
3464 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
3465 except AttributeError:
3466 try:
3467 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3468 except ValueError:
3469 pass;
3470 except ValueError:
3471 pass;
3472 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3473 f.write(pretmpfilename.get('Content'));
3474 f.close();
3475 exec_time_end = time.time();
3476 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
3477 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
3478 return returnval;
3480 if(not haveurllib3):
3481 def download_from_url_file_with_urllib3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3482 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
3483 return returnval;
3485 if(haveurllib3):
3486 def download_from_url_to_file_with_urllib3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3487 global geturls_download_sleep, havezstd, havebrotli;
3488 if(sleep<0):
3489 sleep = geturls_download_sleep;
3490 if(timeout<=0):
3491 timeout = 10;
3492 if(not outfile=="-"):
3493 outpath = outpath.rstrip(os.path.sep);
3494 filepath = os.path.realpath(outpath+os.path.sep+outfile);
3495 if(not os.path.exists(outpath)):
3496 os.makedirs(outpath);
3497 if(os.path.exists(outpath) and os.path.isfile(outpath)):
3498 return False;
3499 if(os.path.exists(filepath) and os.path.isdir(filepath)):
3500 return False;
3501 pretmpfilename = download_from_url_file_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3502 if(not pretmpfilename):
3503 return False;
3504 tmpfilename = pretmpfilename.get('Filename');
3505 downloadsize = int(os.path.getsize(tmpfilename));
3506 fulldatasize = 0;
3507 log.info("Moving file "+tmpfilename+" to "+filepath);
3508 exec_time_start = time.time();
3509 shutil.move(tmpfilename, filepath);
3510 try:
3511 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
3512 except AttributeError:
3513 try:
3514 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3515 except ValueError:
3516 pass;
3517 except ValueError:
3518 pass;
3519 exec_time_end = time.time();
3520 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
3521 if(os.path.exists(tmpfilename)):
3522 os.remove(tmpfilename);
3523 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3524 if(outfile=="-"):
3525 pretmpfilename = download_from_url_file_with_urllib3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3526 tmpfilename = pretmpfilename.get('Filename');
3527 downloadsize = int(os.path.getsize(tmpfilename));
3528 fulldatasize = 0;
3529 prevdownsize = 0;
3530 exec_time_start = time.time();
3531 with open(tmpfilename, 'rb') as ft:
3532 f = BytesIO();
3533 while True:
3534 databytes = ft.read(buffersize[1]);
3535 if not databytes: break;
3536 datasize = len(databytes);
3537 fulldatasize = datasize + fulldatasize;
3538 percentage = "";
3539 if(downloadsize>0):
3540 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3541 downloaddiff = fulldatasize - prevdownsize;
3542 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3543 prevdownsize = fulldatasize;
3544 f.write(databytes);
3545 f.seek(0);
3546 fdata = f.getvalue();
3547 f.close();
3548 ft.close();
3549 os.remove(tmpfilename);
3550 exec_time_end = time.time();
3551 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
3552 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3553 return returnval;
3555 if(not haveurllib3):
3556 def download_from_url_to_file_with_urllib3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3557 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
3558 return returnval;
3560 if(havemechanize):
3561 def download_from_url_with_mechanize(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3562 global geturls_download_sleep, havezstd, havebrotli;
3563 if(sleep<0):
3564 sleep = geturls_download_sleep;
3565 if(timeout<=0):
3566 timeout = 10;
3567 urlparts = urlparse.urlparse(httpurl);
3568 if(isinstance(httpheaders, list)):
3569 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
3570 httpheaders = fix_header_names(httpheaders);
3571 if(httpuseragent is not None):
3572 if('User-Agent' in httpheaders):
3573 httpheaders['User-Agent'] = httpuseragent;
3574 else:
3575 httpuseragent.update({'User-Agent': httpuseragent});
3576 if(httpreferer is not None):
3577 if('Referer' in httpheaders):
3578 httpheaders['Referer'] = httpreferer;
3579 else:
3580 httpuseragent.update({'Referer': httpreferer});
3581 if(urlparts.username is not None or urlparts.password is not None):
3582 if(sys.version[0]=="2"):
3583 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
3584 if(sys.version[0]>="3"):
3585 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
3586 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
3587 geturls_opener = mechanize.Browser();
3588 if(isinstance(httpheaders, dict)):
3589 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
3590 time.sleep(sleep);
3591 geturls_opener.addheaders = httpheaders;
3592 geturls_opener.set_cookiejar(httpcookie);
3593 geturls_opener.set_handle_robots(False);
3594 if(postdata is not None and not isinstance(postdata, dict)):
3595 postdata = urlencode(postdata);
3596 try:
3597 if(httpmethod=="GET"):
3598 geturls_text = geturls_opener.open(httpurl);
3599 elif(httpmethod=="POST"):
3600 geturls_text = geturls_opener.open(httpurl, data=postdata);
3601 else:
3602 geturls_text = geturls_opener.open(httpurl);
3603 except mechanize.HTTPError as geturls_text_error:
3604 geturls_text = geturls_text_error;
3605 log.info("Error With URL "+httpurl);
3606 except URLError:
3607 log.info("Error With URL "+httpurl);
3608 return False;
3609 except socket.timeout:
3610 log.info("Error With URL "+httpurl);
3611 return False;
3612 httpcodeout = geturls_text.code;
3613 httpcodereason = geturls_text.msg;
3614 httpversionout = "1.1";
3615 httpmethodout = httpmethod;
3616 httpurlout = geturls_text.geturl();
3617 httpheaderout = geturls_text.info();
3618 reqhead = geturls_opener.request;
3619 httpheadersentout = reqhead.header_items();
3620 if(isinstance(httpheaderout, list)):
3621 httpheaderout = dict(make_http_headers_from_list_to_dict(httpheaderout));
3622 if(sys.version[0]=="2"):
3623 try:
3624 prehttpheaderout = httpheaderout;
3625 httpheaderkeys = httpheaderout.keys();
3626 imax = len(httpheaderkeys);
3627 ic = 0;
3628 httpheaderout = {};
3629 while(ic < imax):
3630 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
3631 ic += 1;
3632 except AttributeError:
3633 pass;
3634 httpheaderout = fix_header_names(httpheaderout);
3635 if(isinstance(httpheadersentout, list)):
3636 httpheadersentout = dict(make_http_headers_from_list_to_dict(httpheadersentout));
3637 httpheadersentout = fix_header_names(httpheadersentout);
3638 downloadsize = httpheaderout.get('Content-Length');
3639 if(downloadsize is not None):
3640 downloadsize = int(downloadsize);
3641 if downloadsize is None: downloadsize = 0;
3642 fulldatasize = 0;
3643 prevdownsize = 0;
3644 log.info("Downloading URL "+httpurl);
3645 with BytesIO() as strbuf:
3646 while True:
3647 databytes = geturls_text.read(buffersize);
3648 if not databytes: break;
3649 datasize = len(databytes);
3650 fulldatasize = datasize + fulldatasize;
3651 percentage = "";
3652 if(downloadsize>0):
3653 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3654 downloaddiff = fulldatasize - prevdownsize;
3655 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3656 prevdownsize = fulldatasize;
3657 strbuf.write(databytes);
3658 strbuf.seek(0);
3659 returnval_content = strbuf.read();
3660 if(httpheaderout.get("Content-Encoding")=="gzip"):
3661 try:
3662 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
3663 except zlib.error:
3664 pass;
3665 elif(httpheaderout.get("Content-Encoding")=="deflate"):
3666 try:
3667 returnval_content = zlib.decompress(returnval_content);
3668 except zlib.error:
3669 pass;
3670 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
3671 try:
3672 returnval_content = brotli.decompress(returnval_content);
3673 except brotli.error:
3674 pass;
3675 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
3676 try:
3677 returnval_content = zstandard.decompress(returnval_content);
3678 except zstandard.error:
3679 pass;
3680 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
3681 try:
3682 returnval_content = lzma.decompress(returnval_content);
3683 except zstandard.error:
3684 pass;
3685 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
3686 try:
3687 returnval_content = bz2.decompress(returnval_content);
3688 except zstandard.error:
3689 pass;
3690 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "mechanize"};
3691 geturls_text.close();
3692 return returnval;
3694 if(not havemechanize):
3695 def download_from_url_with_mechanize(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3696 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
3697 return returnval;
3699 if(havemechanize):
3700 def download_from_url_file_with_mechanize(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3701 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
3702 exec_time_start = time.time();
3703 myhash = hashlib.new("sha1");
3704 if(sys.version[0]=="2"):
3705 myhash.update(httpurl);
3706 myhash.update(str(buffersize));
3707 myhash.update(str(exec_time_start));
3708 if(sys.version[0]>="3"):
3709 myhash.update(httpurl.encode('utf-8'));
3710 myhash.update(str(buffersize).encode('utf-8'));
3711 myhash.update(str(exec_time_start).encode('utf-8'));
3712 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
3713 if(sleep<0):
3714 sleep = geturls_download_sleep;
3715 if(timeout<=0):
3716 timeout = 10;
3717 pretmpfilename = download_from_url_with_mechanize(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
3718 if(not pretmpfilename):
3719 return False;
3720 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
3721 tmpfilename = f.name;
3722 try:
3723 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
3724 except AttributeError:
3725 try:
3726 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3727 except ValueError:
3728 pass;
3729 except ValueError:
3730 pass;
3731 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3732 f.write(pretmpfilename.get('Content'));
3733 f.close();
3734 exec_time_end = time.time();
3735 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
3736 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
3737 return returnval;
3739 if(not havemechanize):
3740 def download_from_url_file_with_mechanize(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3741 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
3742 return returnval;
3744 if(havemechanize):
3745 def download_from_url_to_file_with_mechanize(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3746 global geturls_download_sleep, havezstd, havebrotli;
3747 if(sleep<0):
3748 sleep = geturls_download_sleep;
3749 if(timeout<=0):
3750 timeout = 10;
3751 if(not outfile=="-"):
3752 outpath = outpath.rstrip(os.path.sep);
3753 filepath = os.path.realpath(outpath+os.path.sep+outfile);
3754 if(not os.path.exists(outpath)):
3755 os.makedirs(outpath);
3756 if(os.path.exists(outpath) and os.path.isfile(outpath)):
3757 return False;
3758 if(os.path.exists(filepath) and os.path.isdir(filepath)):
3759 return False;
3760 pretmpfilename = download_from_url_file_with_mechanize(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3761 if(not pretmpfilename):
3762 return False;
3763 tmpfilename = pretmpfilename.get('Filename');
3764 downloadsize = int(os.path.getsize(tmpfilename));
3765 fulldatasize = 0;
3766 log.info("Moving file "+tmpfilename+" to "+filepath);
3767 exec_time_start = time.time();
3768 shutil.move(tmpfilename, filepath);
3769 try:
3770 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
3771 except AttributeError:
3772 try:
3773 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3774 except ValueError:
3775 pass;
3776 except ValueError:
3777 pass;
3778 exec_time_end = time.time();
3779 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
3780 if(os.path.exists(tmpfilename)):
3781 os.remove(tmpfilename);
3782 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3783 if(outfile=="-"):
3784 pretmpfilename = download_from_url_file_with_mechanize(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
3785 tmpfilename = pretmpfilename.get('Filename');
3786 downloadsize = int(os.path.getsize(tmpfilename));
3787 fulldatasize = 0;
3788 prevdownsize = 0;
3789 exec_time_start = time.time();
3790 with open(tmpfilename, 'rb') as ft:
3791 f = BytesIO();
3792 while True:
3793 databytes = ft.read(buffersize[1]);
3794 if not databytes: break;
3795 datasize = len(databytes);
3796 fulldatasize = datasize + fulldatasize;
3797 percentage = "";
3798 if(downloadsize>0):
3799 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3800 downloaddiff = fulldatasize - prevdownsize;
3801 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3802 prevdownsize = fulldatasize;
3803 f.write(databytes);
3804 f.seek(0);
3805 fdata = f.getvalue();
3806 f.close();
3807 ft.close();
3808 os.remove(tmpfilename);
3809 exec_time_end = time.time();
3810 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
3811 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
3812 return returnval;
3814 if(not havemechanize):
3815 def download_from_url_to_file_with_mechanize(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
3816 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
3817 return returnval;
3819 if(havepycurl):
3820 def download_from_url_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3821 global geturls_download_sleep, havezstd, havebrotli;
3822 if(sleep<0):
3823 sleep = geturls_download_sleep;
3824 if(timeout<=0):
3825 timeout = 10;
3826 urlparts = urlparse.urlparse(httpurl);
3827 if(isinstance(httpheaders, list)):
3828 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
3829 httpheaders = fix_header_names(httpheaders);
3830 if(httpuseragent is not None):
3831 if('User-Agent' in httpheaders):
3832 httpheaders['User-Agent'] = httpuseragent;
3833 else:
3834 httpuseragent.update({'User-Agent': httpuseragent});
3835 if(httpreferer is not None):
3836 if('Referer' in httpheaders):
3837 httpheaders['Referer'] = httpreferer;
3838 else:
3839 httpuseragent.update({'Referer': httpreferer});
3840 if(urlparts.username is not None or urlparts.password is not None):
3841 if(sys.version[0]=="2"):
3842 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
3843 if(sys.version[0]>="3"):
3844 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
3845 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
3846 geturls_opener = build_opener(HTTPCookieProcessor(httpcookie));
3847 if(isinstance(httpheaders, dict)):
3848 httpheaders = make_http_headers_from_dict_to_pycurl(httpheaders);
3849 geturls_opener.addheaders = httpheaders;
3850 time.sleep(sleep);
3851 if(postdata is not None and not isinstance(postdata, dict)):
3852 postdata = urlencode(postdata);
3853 retrieved_body = BytesIO();
3854 retrieved_headers = BytesIO();
3855 try:
3856 if(httpmethod=="GET"):
3857 geturls_text = pycurl.Curl();
3858 geturls_text.setopt(geturls_text.URL, httpurl);
3859 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
3860 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
3861 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
3862 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
3863 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
3864 geturls_text.perform();
3865 elif(httpmethod=="POST"):
3866 geturls_text = pycurl.Curl();
3867 geturls_text.setopt(geturls_text.URL, httpurl);
3868 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
3869 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
3870 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
3871 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
3872 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
3873 geturls_text.setopt(geturls_text.POST, True);
3874 geturls_text.setopt(geturls_text.POSTFIELDS, postdata);
3875 geturls_text.perform();
3876 else:
3877 geturls_text = pycurl.Curl();
3878 geturls_text.setopt(geturls_text.URL, httpurl);
3879 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
3880 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
3881 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
3882 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
3883 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
3884 geturls_text.perform();
3885 retrieved_headers.seek(0);
3886 if(sys.version[0]=="2"):
3887 pycurlhead = retrieved_headers.read();
3888 if(sys.version[0]>="3"):
3889 pycurlhead = retrieved_headers.read().decode('UTF-8');
3890 pyhttpverinfo = re.findall(r'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead.splitlines()[0].strip().rstrip('\r\n'))[0];
3891 pycurlheadersout = make_http_headers_from_pycurl_to_dict(pycurlhead);
3892 retrieved_body.seek(0);
3893 except socket.timeout:
3894 log.info("Error With URL "+httpurl);
3895 return False;
3896 except socket.gaierror:
3897 log.info("Error With URL "+httpurl);
3898 return False;
3899 except ValueError:
3900 log.info("Error With URL "+httpurl);
3901 return False;
3902 httpcodeout = geturls_text.getinfo(geturls_text.HTTP_CODE);
3903 httpcodereason = http_status_to_reason(geturls_text.getinfo(geturls_text.HTTP_CODE));
3904 httpversionout = pyhttpverinfo[0];
3905 httpmethodout = httpmethod;
3906 httpurlout = geturls_text.getinfo(geturls_text.EFFECTIVE_URL);
3907 httpheaderout = pycurlheadersout;
3908 httpheadersentout = httpheaders;
3909 if(isinstance(httpheaderout, list)):
3910 httpheaderout = dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout)));
3911 if(sys.version[0]=="2"):
3912 try:
3913 prehttpheaderout = httpheaderout;
3914 httpheaderkeys = httpheaderout.keys();
3915 imax = len(httpheaderkeys);
3916 ic = 0;
3917 httpheaderout = {};
3918 while(ic < imax):
3919 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
3920 ic += 1;
3921 except AttributeError:
3922 pass;
3923 httpheaderout = fix_header_names(httpheaderout);
3924 if(isinstance(httpheadersentout, list)):
3925 httpheadersentout = dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout)));
3926 httpheadersentout = fix_header_names(httpheadersentout);
3927 downloadsize = httpheaderout.get('Content-Length');
3928 if(downloadsize is not None):
3929 downloadsize = int(downloadsize);
3930 if downloadsize is None: downloadsize = 0;
3931 fulldatasize = 0;
3932 prevdownsize = 0;
3933 log.info("Downloading URL "+httpurl);
3934 with BytesIO() as strbuf:
3935 while True:
3936 databytes = retrieved_body.read(buffersize);
3937 if not databytes: break;
3938 datasize = len(databytes);
3939 fulldatasize = datasize + fulldatasize;
3940 percentage = "";
3941 if(downloadsize>0):
3942 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
3943 downloaddiff = fulldatasize - prevdownsize;
3944 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
3945 prevdownsize = fulldatasize;
3946 strbuf.write(databytes);
3947 strbuf.seek(0);
3948 returnval_content = strbuf.read();
3949 if(httpheaderout.get("Content-Encoding")=="gzip"):
3950 try:
3951 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
3952 except zlib.error:
3953 pass;
3954 elif(httpheaderout.get("Content-Encoding")=="deflate"):
3955 try:
3956 returnval_content = zlib.decompress(returnval_content);
3957 except zlib.error:
3958 pass;
3959 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
3960 try:
3961 returnval_content = brotli.decompress(returnval_content);
3962 except brotli.error:
3963 pass;
3964 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
3965 try:
3966 returnval_content = zstandard.decompress(returnval_content);
3967 except zstandard.error:
3968 pass;
3969 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
3970 try:
3971 returnval_content = lzma.decompress(returnval_content);
3972 except zstandard.error:
3973 pass;
3974 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
3975 try:
3976 returnval_content = bz2.decompress(returnval_content);
3977 except zstandard.error:
3978 pass;
3979 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "pycurl"};
3980 geturls_text.close();
3981 return returnval;
3983 if(not havepycurl):
3984 def download_from_url_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
3985 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
3986 return returnval;
3988 if(havepycurl):
3989 def download_from_url_file_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
3990 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
3991 exec_time_start = time.time();
3992 myhash = hashlib.new("sha1");
3993 if(sys.version[0]=="2"):
3994 myhash.update(httpurl);
3995 myhash.update(str(buffersize));
3996 myhash.update(str(exec_time_start));
3997 if(sys.version[0]>="3"):
3998 myhash.update(httpurl.encode('utf-8'));
3999 myhash.update(str(buffersize).encode('utf-8'));
4000 myhash.update(str(exec_time_start).encode('utf-8'));
4001 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
4002 if(sleep<0):
4003 sleep = geturls_download_sleep;
4004 if(timeout<=0):
4005 timeout = 10;
4006 pretmpfilename = download_from_url_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
4007 if(not pretmpfilename):
4008 return False;
4009 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
4010 tmpfilename = f.name;
4011 try:
4012 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
4013 except AttributeError:
4014 try:
4015 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4016 except ValueError:
4017 pass;
4018 except ValueError:
4019 pass;
4020 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4021 f.write(pretmpfilename.get('Content'));
4022 f.close();
4023 exec_time_end = time.time();
4024 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
4025 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
4026 return returnval;
4028 if(not havepycurl):
4029 def download_from_url_file_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4030 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
4031 return returnval;
4033 if(havepycurl):
4034 def download_from_url_to_file_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4035 global geturls_download_sleep, havezstd, havebrotli;
4036 if(sleep<0):
4037 sleep = geturls_download_sleep;
4038 if(timeout<=0):
4039 timeout = 10;
4040 if(not outfile=="-"):
4041 outpath = outpath.rstrip(os.path.sep);
4042 filepath = os.path.realpath(outpath+os.path.sep+outfile);
4043 if(not os.path.exists(outpath)):
4044 os.makedirs(outpath);
4045 if(os.path.exists(outpath) and os.path.isfile(outpath)):
4046 return False;
4047 if(os.path.exists(filepath) and os.path.isdir(filepath)):
4048 return False;
4049 pretmpfilename = download_from_url_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4050 if(not pretmpfilename):
4051 return False;
4052 tmpfilename = pretmpfilename.get('Filename');
4053 downloadsize = int(os.path.getsize(tmpfilename));
4054 fulldatasize = 0;
4055 log.info("Moving file "+tmpfilename+" to "+filepath);
4056 exec_time_start = time.time();
4057 shutil.move(tmpfilename, filepath);
4058 try:
4059 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
4060 except AttributeError:
4061 try:
4062 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4063 except ValueError:
4064 pass;
4065 except ValueError:
4066 pass;
4067 exec_time_end = time.time();
4068 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
4069 if(os.path.exists(tmpfilename)):
4070 os.remove(tmpfilename);
4071 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4072 if(outfile=="-"):
4073 pretmpfilename = download_from_url_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4074 tmpfilename = pretmpfilename.get('Filename');
4075 downloadsize = int(os.path.getsize(tmpfilename));
4076 fulldatasize = 0;
4077 prevdownsize = 0;
4078 exec_time_start = time.time();
4079 with open(tmpfilename, 'rb') as ft:
4080 f = BytesIO();
4081 while True:
4082 databytes = ft.read(buffersize[1]);
4083 if not databytes: break;
4084 datasize = len(databytes);
4085 fulldatasize = datasize + fulldatasize;
4086 percentage = "";
4087 if(downloadsize>0):
4088 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4089 downloaddiff = fulldatasize - prevdownsize;
4090 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4091 prevdownsize = fulldatasize;
4092 f.write(databytes);
4093 f.seek(0);
4094 fdata = f.getvalue();
4095 f.close();
4096 ft.close();
4097 os.remove(tmpfilename);
4098 exec_time_end = time.time();
4099 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
4100 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4101 return returnval;
4103 if(not havepycurl):
4104 def download_from_url_to_file_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4105 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
4106 return returnval;
4108 if(havepycurl and hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4109 def download_from_url_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4110 global geturls_download_sleep, havezstd, havebrotli;
4111 if(sleep<0):
4112 sleep = geturls_download_sleep;
4113 if(timeout<=0):
4114 timeout = 10;
4115 urlparts = urlparse.urlparse(httpurl);
4116 if(isinstance(httpheaders, list)):
4117 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
4118 httpheaders = fix_header_names(httpheaders);
4119 if(httpuseragent is not None):
4120 if('User-Agent' in httpheaders):
4121 httpheaders['User-Agent'] = httpuseragent;
4122 else:
4123 httpuseragent.update({'User-Agent': httpuseragent});
4124 if(httpreferer is not None):
4125 if('Referer' in httpheaders):
4126 httpheaders['Referer'] = httpreferer;
4127 else:
4128 httpuseragent.update({'Referer': httpreferer});
4129 if(urlparts.username is not None or urlparts.password is not None):
4130 if(sys.version[0]=="2"):
4131 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
4132 if(sys.version[0]>="3"):
4133 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
4134 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
4135 geturls_opener = build_opener(HTTPCookieProcessor(httpcookie));
4136 if(isinstance(httpheaders, dict)):
4137 httpheaders = make_http_headers_from_dict_to_pycurl(httpheaders);
4138 geturls_opener.addheaders = httpheaders;
4139 time.sleep(sleep);
4140 if(postdata is not None and not isinstance(postdata, dict)):
4141 postdata = urlencode(postdata);
4142 retrieved_body = BytesIO();
4143 retrieved_headers = BytesIO();
4144 try:
4145 if(httpmethod=="GET"):
4146 geturls_text = pycurl.Curl();
4147 geturls_text.setopt(geturls_text.URL, httpurl);
4148 geturls_text.setopt(geturls_text.HTTP_VERSION, geturls_text.CURL_HTTP_VERSION_2_0);
4149 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
4150 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
4151 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
4152 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
4153 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
4154 geturls_text.perform();
4155 elif(httpmethod=="POST"):
4156 geturls_text = pycurl.Curl();
4157 geturls_text.setopt(geturls_text.URL, httpurl);
4158 geturls_text.setopt(geturls_text.HTTP_VERSION, geturls_text.CURL_HTTP_VERSION_2_0);
4159 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
4160 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
4161 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
4162 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
4163 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
4164 geturls_text.setopt(geturls_text.POST, True);
4165 geturls_text.setopt(geturls_text.POSTFIELDS, postdata);
4166 geturls_text.perform();
4167 else:
4168 geturls_text = pycurl.Curl();
4169 geturls_text.setopt(geturls_text.URL, httpurl);
4170 geturls_text.setopt(geturls_text.HTTP_VERSION, geturls_text.CURL_HTTP_VERSION_2_0);
4171 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
4172 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
4173 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
4174 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
4175 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
4176 geturls_text.perform();
4177 retrieved_headers.seek(0);
4178 if(sys.version[0]=="2"):
4179 pycurlhead = retrieved_headers.read();
4180 if(sys.version[0]>="3"):
4181 pycurlhead = retrieved_headers.read().decode('UTF-8');
4182 pyhttpverinfo = re.findall(r'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead.splitlines()[0].strip())[0];
4183 pycurlheadersout = make_http_headers_from_pycurl_to_dict(pycurlhead);
4184 retrieved_body.seek(0);
4185 except socket.timeout:
4186 log.info("Error With URL "+httpurl);
4187 return False;
4188 except socket.gaierror:
4189 log.info("Error With URL "+httpurl);
4190 return False;
4191 except ValueError:
4192 log.info("Error With URL "+httpurl);
4193 return False;
4194 httpcodeout = geturls_text.getinfo(geturls_text.HTTP_CODE);
4195 httpcodereason = http_status_to_reason(geturls_text.getinfo(geturls_text.HTTP_CODE));
4196 httpversionout = pyhttpverinfo[0];
4197 httpmethodout = httpmethod;
4198 httpurlout = geturls_text.getinfo(geturls_text.EFFECTIVE_URL);
4199 httpheaderout = pycurlheadersout;
4200 httpheadersentout = httpheaders;
4201 if(isinstance(httpheaderout, list)):
4202 httpheaderout = dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout)));
4203 if(sys.version[0]=="2"):
4204 try:
4205 prehttpheaderout = httpheaderout;
4206 httpheaderkeys = httpheaderout.keys();
4207 imax = len(httpheaderkeys);
4208 ic = 0;
4209 httpheaderout = {};
4210 while(ic < imax):
4211 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
4212 ic += 1;
4213 except AttributeError:
4214 pass;
4215 httpheaderout = fix_header_names(httpheaderout);
4216 if(isinstance(httpheadersentout, list)):
4217 httpheadersentout = dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout)));
4218 httpheadersentout = fix_header_names(httpheadersentout);
4219 downloadsize = httpheaderout.get('Content-Length');
4220 if(downloadsize is not None):
4221 downloadsize = int(downloadsize);
4222 if downloadsize is None: downloadsize = 0;
4223 fulldatasize = 0;
4224 prevdownsize = 0;
4225 log.info("Downloading URL "+httpurl);
4226 with BytesIO() as strbuf:
4227 while True:
4228 databytes = retrieved_body.read(buffersize);
4229 if not databytes: break;
4230 datasize = len(databytes);
4231 fulldatasize = datasize + fulldatasize;
4232 percentage = "";
4233 if(downloadsize>0):
4234 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4235 downloaddiff = fulldatasize - prevdownsize;
4236 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4237 prevdownsize = fulldatasize;
4238 strbuf.write(databytes);
4239 strbuf.seek(0);
4240 returnval_content = strbuf.read();
4241 if(httpheaderout.get("Content-Encoding")=="gzip"):
4242 try:
4243 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
4244 except zlib.error:
4245 pass;
4246 elif(httpheaderout.get("Content-Encoding")=="deflate"):
4247 try:
4248 returnval_content = zlib.decompress(returnval_content);
4249 except zlib.error:
4250 pass;
4251 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
4252 try:
4253 returnval_content = brotli.decompress(returnval_content);
4254 except brotli.error:
4255 pass;
4256 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
4257 try:
4258 returnval_content = zstandard.decompress(returnval_content);
4259 except zstandard.error:
4260 pass;
4261 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
4262 try:
4263 returnval_content = lzma.decompress(returnval_content);
4264 except zstandard.error:
4265 pass;
4266 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
4267 try:
4268 returnval_content = bz2.decompress(returnval_content);
4269 except zstandard.error:
4270 pass;
4271 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "pycurl2"};
4272 geturls_text.close();
4273 return returnval;
4275 if(not havepycurl):
4276 def download_from_url_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4277 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
4278 return returnval;
4280 if(havepycurl and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4281 def download_from_url_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4282 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
4283 return returnval;
4285 if(havepycurl and hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4286 def download_from_url_file_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4287 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
4288 exec_time_start = time.time();
4289 myhash = hashlib.new("sha1");
4290 if(sys.version[0]=="2"):
4291 myhash.update(httpurl);
4292 myhash.update(str(buffersize));
4293 myhash.update(str(exec_time_start));
4294 if(sys.version[0]>="3"):
4295 myhash.update(httpurl.encode('utf-8'));
4296 myhash.update(str(buffersize).encode('utf-8'));
4297 myhash.update(str(exec_time_start).encode('utf-8'));
4298 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
4299 if(sleep<0):
4300 sleep = geturls_download_sleep;
4301 if(timeout<=0):
4302 timeout = 10;
4303 pretmpfilename = download_from_url_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
4304 if(not pretmpfilename):
4305 return False;
4306 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
4307 tmpfilename = f.name;
4308 try:
4309 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
4310 except AttributeError:
4311 try:
4312 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4313 except ValueError:
4314 pass;
4315 except ValueError:
4316 pass;
4317 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4318 f.write(pretmpfilename.get('Content'));
4319 f.close();
4320 exec_time_end = time.time();
4321 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
4322 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
4323 return returnval;
4325 if(not havepycurl):
4326 def download_from_url_file_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4327 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
4328 return returnval;
4330 if(havepycurl and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4331 def download_from_url_file_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4332 returnval = download_from_url_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
4333 return returnval;
4335 if(havepycurl and hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4336 def download_from_url_to_file_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4337 global geturls_download_sleep, havezstd, havebrotli;
4338 if(sleep<0):
4339 sleep = geturls_download_sleep;
4340 if(timeout<=0):
4341 timeout = 10;
4342 if(not outfile=="-"):
4343 outpath = outpath.rstrip(os.path.sep);
4344 filepath = os.path.realpath(outpath+os.path.sep+outfile);
4345 if(not os.path.exists(outpath)):
4346 os.makedirs(outpath);
4347 if(os.path.exists(outpath) and os.path.isfile(outpath)):
4348 return False;
4349 if(os.path.exists(filepath) and os.path.isdir(filepath)):
4350 return False;
4351 pretmpfilename = download_from_url_file_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4352 if(not pretmpfilename):
4353 return False;
4354 tmpfilename = pretmpfilename.get('Filename');
4355 downloadsize = int(os.path.getsize(tmpfilename));
4356 fulldatasize = 0;
4357 log.info("Moving file "+tmpfilename+" to "+filepath);
4358 exec_time_start = time.time();
4359 shutil.move(tmpfilename, filepath);
4360 try:
4361 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
4362 except AttributeError:
4363 try:
4364 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4365 except ValueError:
4366 pass;
4367 except ValueError:
4368 pass;
4369 exec_time_end = time.time();
4370 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
4371 if(os.path.exists(tmpfilename)):
4372 os.remove(tmpfilename);
4373 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4374 if(outfile=="-"):
4375 pretmpfilename = download_from_url_file_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4376 tmpfilename = pretmpfilename.get('Filename');
4377 downloadsize = int(os.path.getsize(tmpfilename));
4378 fulldatasize = 0;
4379 prevdownsize = 0;
4380 exec_time_start = time.time();
4381 with open(tmpfilename, 'rb') as ft:
4382 f = BytesIO();
4383 while True:
4384 databytes = ft.read(buffersize[1]);
4385 if not databytes: break;
4386 datasize = len(databytes);
4387 fulldatasize = datasize + fulldatasize;
4388 percentage = "";
4389 if(downloadsize>0):
4390 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4391 downloaddiff = fulldatasize - prevdownsize;
4392 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4393 prevdownsize = fulldatasize;
4394 f.write(databytes);
4395 f.seek(0);
4396 fdata = f.getvalue();
4397 f.close();
4398 ft.close();
4399 os.remove(tmpfilename);
4400 exec_time_end = time.time();
4401 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
4402 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4403 return returnval;
4405 if(not havepycurl):
4406 def download_from_url_to_file_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4407 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
4408 return returnval;
4410 if(havepycurl and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4411 def download_from_url_to_file_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4412 returnval = download_from_url_to_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
4413 return returnval;
4415 if(havepycurl and hasattr(pycurl, "CURL_HTTP_VERSION_3_0")):
4416 def download_from_url_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4417 global geturls_download_sleep, havezstd, havebrotli;
4418 if(sleep<0):
4419 sleep = geturls_download_sleep;
4420 if(timeout<=0):
4421 timeout = 10;
4422 urlparts = urlparse.urlparse(httpurl);
4423 if(isinstance(httpheaders, list)):
4424 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
4425 httpheaders = fix_header_names(httpheaders);
4426 if(httpuseragent is not None):
4427 if('User-Agent' in httpheaders):
4428 httpheaders['User-Agent'] = httpuseragent;
4429 else:
4430 httpuseragent.update({'User-Agent': httpuseragent});
4431 if(httpreferer is not None):
4432 if('Referer' in httpheaders):
4433 httpheaders['Referer'] = httpreferer;
4434 else:
4435 httpuseragent.update({'Referer': httpreferer});
4436 if(urlparts.username is not None or urlparts.password is not None):
4437 if(sys.version[0]=="2"):
4438 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password));
4439 if(sys.version[0]>="3"):
4440 inurlencode = b64encode(str(urlparts.username+":"+urlparts.password).encode()).decode("UTF-8");
4441 httpheaders.update( { 'Authorization': "Basic "+inurlencode } );
4442 geturls_opener = build_opener(HTTPCookieProcessor(httpcookie));
4443 if(isinstance(httpheaders, dict)):
4444 httpheaders = make_http_headers_from_dict_to_pycurl(httpheaders);
4445 geturls_opener.addheaders = httpheaders;
4446 time.sleep(sleep);
4447 if(postdata is not None and not isinstance(postdata, dict)):
4448 postdata = urlencode(postdata);
4449 retrieved_body = BytesIO();
4450 retrieved_headers = BytesIO();
4451 try:
4452 if(httpmethod=="GET"):
4453 geturls_text = pycurl.Curl();
4454 geturls_text.setopt(geturls_text.URL, httpurl);
4455 geturls_text.setopt(geturls_text.HTTP_VERSION, geturls_text.CURL_HTTP_VERSION_3_0);
4456 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
4457 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
4458 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
4459 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
4460 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
4461 geturls_text.perform();
4462 elif(httpmethod=="POST"):
4463 geturls_text = pycurl.Curl();
4464 geturls_text.setopt(geturls_text.URL, httpurl);
4465 geturls_text.setopt(geturls_text.HTTP_VERSION, geturls_text.CURL_HTTP_VERSION_3_0);
4466 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
4467 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
4468 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
4469 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
4470 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
4471 geturls_text.setopt(geturls_text.POST, True);
4472 geturls_text.setopt(geturls_text.POSTFIELDS, postdata);
4473 geturls_text.perform();
4474 else:
4475 geturls_text = pycurl.Curl();
4476 geturls_text.setopt(geturls_text.URL, httpurl);
4477 geturls_text.setopt(geturls_text.HTTP_VERSION, geturls_text.CURL_HTTP_VERSION_3_0);
4478 geturls_text.setopt(geturls_text.WRITEFUNCTION, retrieved_body.write);
4479 geturls_text.setopt(geturls_text.HTTPHEADER, httpheaders);
4480 geturls_text.setopt(geturls_text.HEADERFUNCTION, retrieved_headers.write);
4481 geturls_text.setopt(geturls_text.FOLLOWLOCATION, True);
4482 geturls_text.setopt(geturls_text.TIMEOUT, timeout);
4483 geturls_text.perform();
4484 retrieved_headers.seek(0);
4485 if(sys.version[0]=="2"):
4486 pycurlhead = retrieved_headers.read();
4487 if(sys.version[0]>="3"):
4488 pycurlhead = retrieved_headers.read().decode('UTF-8');
4489 pyhttpverinfo = re.findall(r'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead.splitlines()[0].strip().rstrip('\r\n'))[0];
4490 pycurlheadersout = make_http_headers_from_pycurl_to_dict(pycurlhead);
4491 retrieved_body.seek(0);
4492 except socket.timeout:
4493 log.info("Error With URL "+httpurl);
4494 return False;
4495 except socket.gaierror:
4496 log.info("Error With URL "+httpurl);
4497 return False;
4498 except ValueError:
4499 log.info("Error With URL "+httpurl);
4500 return False;
4501 httpcodeout = geturls_text.getinfo(geturls_text.HTTP_CODE);
4502 httpcodereason = http_status_to_reason(geturls_text.getinfo(geturls_text.HTTP_CODE));
4503 httpversionout = pyhttpverinfo[0];
4504 httpmethodout = httpmethod;
4505 httpurlout = geturls_text.getinfo(geturls_text.EFFECTIVE_URL);
4506 httpheaderout = pycurlheadersout;
4507 httpheadersentout = httpheaders;
4508 if(isinstance(httpheaderout, list)):
4509 httpheaderout = dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout)));
4510 if(sys.version[0]=="2"):
4511 try:
4512 prehttpheaderout = httpheaderout;
4513 httpheaderkeys = httpheaderout.keys();
4514 imax = len(httpheaderkeys);
4515 ic = 0;
4516 httpheaderout = {};
4517 while(ic < imax):
4518 httpheaderout.update({httpheaderkeys[ic]: prehttpheaderout[httpheaderkeys[ic]]});
4519 ic += 1;
4520 except AttributeError:
4521 pass;
4522 httpheaderout = fix_header_names(httpheaderout);
4523 if(isinstance(httpheadersentout, list)):
4524 httpheadersentout = dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout)));
4525 httpheadersentout = fix_header_names(httpheadersentout);
4526 downloadsize = httpheaderout.get('Content-Length');
4527 if(downloadsize is not None):
4528 downloadsize = int(downloadsize);
4529 if downloadsize is None: downloadsize = 0;
4530 fulldatasize = 0;
4531 prevdownsize = 0;
4532 log.info("Downloading URL "+httpurl);
4533 with BytesIO() as strbuf:
4534 while True:
4535 databytes = retrieved_body.read(buffersize);
4536 if not databytes: break;
4537 datasize = len(databytes);
4538 fulldatasize = datasize + fulldatasize;
4539 percentage = "";
4540 if(downloadsize>0):
4541 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4542 downloaddiff = fulldatasize - prevdownsize;
4543 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4544 prevdownsize = fulldatasize;
4545 strbuf.write(databytes);
4546 strbuf.seek(0);
4547 returnval_content = strbuf.read();
4548 if(httpheaderout.get("Content-Encoding")=="gzip"):
4549 try:
4550 returnval_content = zlib.decompress(returnval_content, 16+zlib.MAX_WBITS);
4551 except zlib.error:
4552 pass;
4553 elif(httpheaderout.get("Content-Encoding")=="deflate"):
4554 try:
4555 returnval_content = zlib.decompress(returnval_content);
4556 except zlib.error:
4557 pass;
4558 elif(httpheaderout.get("Content-Encoding")=="br" and havebrotli):
4559 try:
4560 returnval_content = brotli.decompress(returnval_content);
4561 except brotli.error:
4562 pass;
4563 elif(httpheaderout.get("Content-Encoding")=="zstd" and havezstd):
4564 try:
4565 returnval_content = zstandard.decompress(returnval_content);
4566 except zstandard.error:
4567 pass;
4568 elif((httpheaderout.get("Content-Encoding")=="lzma" or httpheaderout.get("Content-Encoding")=="xz") and havelzma):
4569 try:
4570 returnval_content = lzma.decompress(returnval_content);
4571 except zstandard.error:
4572 pass;
4573 elif(httpheaderout.get("Content-Encoding")=="bzip2"):
4574 try:
4575 returnval_content = bz2.decompress(returnval_content);
4576 except zstandard.error:
4577 pass;
4578 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': httpheaderout, 'Version': httpversionout, 'Method': httpmethodout, 'HeadersSent': httpheadersentout, 'URL': httpurlout, 'Code': httpcodeout, 'Reason': httpcodereason, 'HTTPLib': "pycurl3"};
4579 geturls_text.close();
4580 return returnval;
4582 if(not havepycurl):
4583 def download_from_url_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4584 returnval = download_from_url_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
4585 return returnval;
4587 if(havepycurl and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4588 def download_from_url_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4589 returnval = download_from_url_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
4590 return returnval;
4592 if(havepycurl and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4593 def download_from_url_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4594 returnval = download_from_url_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout)
4595 return returnval;
4597 if(havepycurl and hasattr(pycurl, "CURL_HTTP_VERSION_3_0")):
4598 def download_from_url_file_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4599 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
4600 exec_time_start = time.time();
4601 myhash = hashlib.new("sha1");
4602 if(sys.version[0]=="2"):
4603 myhash.update(httpurl);
4604 myhash.update(str(buffersize));
4605 myhash.update(str(exec_time_start));
4606 if(sys.version[0]>="3"):
4607 myhash.update(httpurl.encode('utf-8'));
4608 myhash.update(str(buffersize).encode('utf-8'));
4609 myhash.update(str(exec_time_start).encode('utf-8'));
4610 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
4611 if(sleep<0):
4612 sleep = geturls_download_sleep;
4613 if(timeout<=0):
4614 timeout = 10;
4615 pretmpfilename = download_from_url_with_pycurl3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
4616 if(not pretmpfilename):
4617 return False;
4618 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
4619 tmpfilename = f.name;
4620 try:
4621 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
4622 except AttributeError:
4623 try:
4624 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4625 except ValueError:
4626 pass;
4627 except ValueError:
4628 pass;
4629 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4630 f.write(pretmpfilename.get('Content'));
4631 f.close();
4632 exec_time_end = time.time();
4633 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
4634 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
4635 return returnval;
4637 if(not havepycurl):
4638 def download_from_url_file_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4639 returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
4640 return returnval;
4642 if(havepycurl and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4643 def download_from_url_file_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4644 returnval = download_from_url_file_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
4645 return returnval;
4647 if(havepycurl and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4648 def download_from_url_file_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4649 returnval = download_from_url_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize, sleep, timeout)
4650 return returnval;
4652 if(havepycurl and hasattr(pycurl, "CURL_HTTP_VERSION_3_0")):
4653 def download_from_url_to_file_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4654 global geturls_download_sleep, havezstd, havebrotli;
4655 if(sleep<0):
4656 sleep = geturls_download_sleep;
4657 if(timeout<=0):
4658 timeout = 10;
4659 if(not outfile=="-"):
4660 outpath = outpath.rstrip(os.path.sep);
4661 filepath = os.path.realpath(outpath+os.path.sep+outfile);
4662 if(not os.path.exists(outpath)):
4663 os.makedirs(outpath);
4664 if(os.path.exists(outpath) and os.path.isfile(outpath)):
4665 return False;
4666 if(os.path.exists(filepath) and os.path.isdir(filepath)):
4667 return False;
4668 pretmpfilename = download_from_url_file_with_pycurl3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4669 if(not pretmpfilename):
4670 return False;
4671 tmpfilename = pretmpfilename.get('Filename');
4672 downloadsize = int(os.path.getsize(tmpfilename));
4673 fulldatasize = 0;
4674 log.info("Moving file "+tmpfilename+" to "+filepath);
4675 exec_time_start = time.time();
4676 shutil.move(tmpfilename, filepath);
4677 try:
4678 os.utime(filepath, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
4679 except AttributeError:
4680 try:
4681 os.utime(filepath, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4682 except ValueError:
4683 pass;
4684 except ValueError:
4685 pass;
4686 exec_time_end = time.time();
4687 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
4688 if(os.path.exists(tmpfilename)):
4689 os.remove(tmpfilename);
4690 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4691 if(outfile=="-"):
4692 pretmpfilename = download_from_url_file_with_pycurl3(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4693 tmpfilename = pretmpfilename.get('Filename');
4694 downloadsize = int(os.path.getsize(tmpfilename));
4695 fulldatasize = 0;
4696 prevdownsize = 0;
4697 exec_time_start = time.time();
4698 with open(tmpfilename, 'rb') as ft:
4699 f = BytesIO();
4700 while True:
4701 databytes = ft.read(buffersize[1]);
4702 if not databytes: break;
4703 datasize = len(databytes);
4704 fulldatasize = datasize + fulldatasize;
4705 percentage = "";
4706 if(downloadsize>0):
4707 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4708 downloaddiff = fulldatasize - prevdownsize;
4709 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4710 prevdownsize = fulldatasize;
4711 f.write(databytes);
4712 f.seek(0);
4713 fdata = f.getvalue();
4714 f.close();
4715 ft.close();
4716 os.remove(tmpfilename);
4717 exec_time_end = time.time();
4718 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
4719 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': httpmethod, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4720 return returnval;
4722 if(not havepycurl):
4723 def download_from_url_to_file_with_pycurl3(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4724 returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
4725 return returnval;
4727 if(havepycurl and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl, "CURL_HTTP_VERSION_2_0")):
4728 def download_from_url_to_file_with_pycurl2(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4729 returnval = download_from_url_to_file_with_pycurl2(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
4730 return returnval;
4732 if(havepycurl and not hasattr(pycurl, "CURL_HTTP_VERSION_2_0") and not hasattr(pycurl, "CURL_HTTP_VERSION_3_0")):
4733 def download_from_url_to_file_with_pycurl(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4734 returnval = download_from_url_to_file_with_pycurl(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, outfile, outpath, sleep, timeout)
4735 return returnval;
4737 def download_file_from_ftp_file(url):
4738 urlparts = urlparse.urlparse(url);
4739 file_name = os.path.basename(urlparts.path);
4740 file_dir = os.path.dirname(urlparts.path);
4741 if(urlparts.username is not None):
4742 ftp_username = urlparts.username;
4743 else:
4744 ftp_username = "anonymous";
4745 if(urlparts.password is not None):
4746 ftp_password = urlparts.password;
4747 elif(urlparts.password is None and urlparts.username=="anonymous"):
4748 ftp_password = "anonymous";
4749 else:
4750 ftp_password = "";
4751 if(urlparts.scheme=="ftp"):
4752 ftp = FTP();
4753 elif(urlparts.scheme=="ftps"):
4754 ftp = FTP_TLS();
4755 else:
4756 return False;
4757 if(urlparts.scheme=="http" or urlparts.scheme=="https"):
4758 return False;
4759 ftp_port = urlparts.port;
4760 if(urlparts.port is None):
4761 ftp_port = 21;
4762 try:
4763 ftp.connect(urlparts.hostname, ftp_port);
4764 except socket.gaierror:
4765 log.info("Error With URL "+httpurl);
4766 return False;
4767 except socket.timeout:
4768 log.info("Error With URL "+httpurl);
4769 return False;
4770 ftp.login(urlparts.username, urlparts.password);
4771 if(urlparts.scheme=="ftps"):
4772 ftp.prot_p();
4773 ftpfile = BytesIO();
4774 ftp.retrbinary("RETR "+urlparts.path, ftpfile.write);
4775 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
4776 ftp.close();
4777 ftpfile.seek(0, 0);
4778 return ftpfile;
4780 def download_file_from_ftp_string(url):
4781 ftpfile = download_file_from_ftp_file(url);
4782 return ftpfile.read();
4784 def download_from_url_with_ftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
4785 global geturls_download_sleep, havezstd, havebrotli;
4786 if(sleep<0):
4787 sleep = geturls_download_sleep;
4788 if(timeout<=0):
4789 timeout = 10;
4790 urlparts = urlparse.urlparse(httpurl);
4791 if(isinstance(httpheaders, list)):
4792 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
4793 httpheaders = fix_header_names(httpheaders);
4794 if(httpuseragent is not None):
4795 if('User-Agent' in httpheaders):
4796 httpheaders['User-Agent'] = httpuseragent;
4797 else:
4798 httpuseragent.update({'User-Agent': httpuseragent});
4799 if(httpreferer is not None):
4800 if('Referer' in httpheaders):
4801 httpheaders['Referer'] = httpreferer;
4802 else:
4803 httpuseragent.update({'Referer': httpreferer});
4804 if(isinstance(httpheaders, dict)):
4805 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
4806 time.sleep(sleep);
4807 geturls_text = download_file_from_ftp_file(httpurl);
4808 if(not geturls_text):
4809 return False;
4810 downloadsize = None;
4811 if(downloadsize is not None):
4812 downloadsize = int(downloadsize);
4813 if downloadsize is None: downloadsize = 0;
4814 fulldatasize = 0;
4815 prevdownsize = 0;
4816 log.info("Downloading URL "+httpurl);
4817 with BytesIO() as strbuf:
4818 while True:
4819 databytes = geturls_text.read(buffersize);
4820 if not databytes: break;
4821 datasize = len(databytes);
4822 fulldatasize = datasize + fulldatasize;
4823 percentage = "";
4824 if(downloadsize>0):
4825 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4826 downloaddiff = fulldatasize - prevdownsize;
4827 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4828 prevdownsize = fulldatasize;
4829 strbuf.write(databytes);
4830 strbuf.seek(0);
4831 returnval_content = strbuf.read();
4832 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl, 'Code': None};
4833 geturls_text.close();
4834 return returnval;
4836 def download_from_url_file_with_ftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
4837 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
4838 exec_time_start = time.time();
4839 myhash = hashlib.new("sha1");
4840 if(sys.version[0]=="2"):
4841 myhash.update(httpurl);
4842 myhash.update(str(buffersize));
4843 myhash.update(str(exec_time_start));
4844 if(sys.version[0]>="3"):
4845 myhash.update(httpurl.encode('utf-8'));
4846 myhash.update(str(buffersize).encode('utf-8'));
4847 myhash.update(str(exec_time_start).encode('utf-8'));
4848 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
4849 if(sleep<0):
4850 sleep = geturls_download_sleep;
4851 if(timeout<=0):
4852 timeout = 10;
4853 pretmpfilename = download_from_url_with_ftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
4854 if(not pretmpfilename):
4855 return False;
4856 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
4857 tmpfilename = f.name;
4858 try:
4859 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
4860 except AttributeError:
4861 try:
4862 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4863 except ValueError:
4864 pass;
4865 except ValueError:
4866 pass;
4867 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4868 f.write(pretmpfilename.get('Content'));
4869 f.close();
4870 exec_time_end = time.time();
4871 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
4872 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
4873 return returnval;
4875 def download_from_url_to_file_with_ftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
4876 global geturls_download_sleep, havezstd, havebrotli;
4877 if(sleep<0):
4878 sleep = geturls_download_sleep;
4879 if(timeout<=0):
4880 timeout = 10;
4881 if(not outfile=="-"):
4882 outpath = outpath.rstrip(os.path.sep);
4883 filepath = os.path.realpath(outpath+os.path.sep+outfile);
4884 if(not os.path.exists(outpath)):
4885 os.makedirs(outpath);
4886 if(os.path.exists(outpath) and os.path.isfile(outpath)):
4887 return False;
4888 if(os.path.exists(filepath) and os.path.isdir(filepath)):
4889 return False;
4890 pretmpfilename = download_from_url_file_with_ftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4891 if(not pretmpfilename):
4892 return False;
4893 tmpfilename = pretmpfilename.get('Filename');
4894 downloadsize = int(os.path.getsize(tmpfilename));
4895 fulldatasize = 0;
4896 log.info("Moving file "+tmpfilename+" to "+filepath);
4897 exec_time_start = time.time();
4898 shutil.move(tmpfilename, filepath);
4899 exec_time_end = time.time();
4900 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
4901 if(os.path.exists(tmpfilename)):
4902 os.remove(tmpfilename);
4903 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4904 if(outfile=="-"):
4905 pretmpfilename = download_from_url_file_with_ftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
4906 tmpfilename = pretmpfilename.get('Filename');
4907 downloadsize = int(os.path.getsize(tmpfilename));
4908 fulldatasize = 0;
4909 prevdownsize = 0;
4910 exec_time_start = time.time();
4911 with open(tmpfilename, 'rb') as ft:
4912 f = BytesIO();
4913 while True:
4914 databytes = ft.read(buffersize[1]);
4915 if not databytes: break;
4916 datasize = len(databytes);
4917 fulldatasize = datasize + fulldatasize;
4918 percentage = "";
4919 if(downloadsize>0):
4920 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
4921 downloaddiff = fulldatasize - prevdownsize;
4922 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
4923 prevdownsize = fulldatasize;
4924 f.write(databytes);
4925 f.seek(0);
4926 fdata = f.getvalue();
4927 f.close();
4928 ft.close();
4929 os.remove(tmpfilename);
4930 exec_time_end = time.time();
4931 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
4932 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
4933 return returnval;
4935 def upload_file_to_ftp_file(ftpfile, url):
4936 urlparts = urlparse.urlparse(url);
4937 file_name = os.path.basename(urlparts.path);
4938 file_dir = os.path.dirname(urlparts.path);
4939 if(urlparts.username is not None):
4940 ftp_username = urlparts.username;
4941 else:
4942 ftp_username = "anonymous";
4943 if(urlparts.password is not None):
4944 ftp_password = urlparts.password;
4945 elif(urlparts.password is None and urlparts.username=="anonymous"):
4946 ftp_password = "anonymous";
4947 else:
4948 ftp_password = "";
4949 if(urlparts.scheme=="ftp"):
4950 ftp = FTP();
4951 elif(urlparts.scheme=="ftps"):
4952 ftp = FTP_TLS();
4953 else:
4954 return False;
4955 if(urlparts.scheme=="http" or urlparts.scheme=="https"):
4956 return False;
4957 ftp_port = urlparts.port;
4958 if(urlparts.port is None):
4959 ftp_port = 21;
4960 try:
4961 ftp.connect(urlparts.hostname, ftp_port);
4962 except socket.gaierror:
4963 log.info("Error With URL "+httpurl);
4964 return False;
4965 except socket.timeout:
4966 log.info("Error With URL "+httpurl);
4967 return False;
4968 ftp.login(urlparts.username, urlparts.password);
4969 if(urlparts.scheme=="ftps"):
4970 ftp.prot_p();
4971 ftp.storbinary("STOR "+urlparts.path, ftpfile);
4972 ftp.close();
4973 ftpfile.seek(0, 0);
4974 return ftpfile;
4976 def upload_file_to_ftp_string(ftpstring, url):
4977 ftpfileo = BytesIO(ftpstring);
4978 ftpfile = upload_file_to_ftp_file(ftpfileo, url);
4979 ftpfileo.close();
4980 return ftpfile;
4982 if(haveparamiko):
4983 def download_file_from_sftp_file(url):
4984 urlparts = urlparse.urlparse(url);
4985 file_name = os.path.basename(urlparts.path);
4986 file_dir = os.path.dirname(urlparts.path);
4987 if(urlparts.scheme=="http" or urlparts.scheme=="https"):
4988 return False;
4989 sftp_port = urlparts.port;
4990 if(urlparts.port is None):
4991 sftp_port = 22;
4992 else:
4993 sftp_port = urlparts.port;
4994 if(urlparts.username is not None):
4995 sftp_username = urlparts.username;
4996 else:
4997 sftp_username = "anonymous";
4998 if(urlparts.password is not None):
4999 sftp_password = urlparts.password;
5000 elif(urlparts.password is None and urlparts.username=="anonymous"):
5001 sftp_password = "anonymous";
5002 else:
5003 sftp_password = "";
5004 if(urlparts.scheme!="sftp"):
5005 return False;
5006 ssh = paramiko.SSHClient();
5007 ssh.load_system_host_keys();
5008 ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy());
5009 try:
5010 ssh.connect(urlparts.hostname, port=sftp_port, username=urlparts.username, password=urlparts.password);
5011 except paramiko.ssh_exception.SSHException:
5012 return False;
5013 except socket.gaierror:
5014 log.info("Error With URL "+httpurl);
5015 return False;
5016 except socket.timeout:
5017 log.info("Error With URL "+httpurl);
5018 return False;
5019 sftp = ssh.open_sftp();
5020 sftpfile = BytesIO();
5021 sftp.getfo(urlparts.path, sftpfile);
5022 sftp.close();
5023 ssh.close();
5024 sftpfile.seek(0, 0);
5025 return sftpfile;
5026 else:
5027 def download_file_from_sftp_file(url):
5028 return False;
5030 if(haveparamiko):
5031 def download_file_from_sftp_string(url):
5032 sftpfile = download_file_from_sftp_file(url);
5033 return sftpfile.read();
5034 else:
5035 def download_file_from_ftp_string(url):
5036 return False;
5038 if(haveparamiko):
5039 def download_from_url_with_sftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
5040 global geturls_download_sleep, havezstd, havebrotli;
5041 if(sleep<0):
5042 sleep = geturls_download_sleep;
5043 if(timeout<=0):
5044 timeout = 10;
5045 urlparts = urlparse.urlparse(httpurl);
5046 if(isinstance(httpheaders, list)):
5047 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
5048 httpheaders = fix_header_names(httpheaders);
5049 if(httpuseragent is not None):
5050 if('User-Agent' in httpheaders):
5051 httpheaders['User-Agent'] = httpuseragent;
5052 else:
5053 httpuseragent.update({'User-Agent': httpuseragent});
5054 if(httpreferer is not None):
5055 if('Referer' in httpheaders):
5056 httpheaders['Referer'] = httpreferer;
5057 else:
5058 httpuseragent.update({'Referer': httpreferer});
5059 if(isinstance(httpheaders, dict)):
5060 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
5061 time.sleep(sleep);
5062 geturls_text = download_file_from_sftp_file(httpurl);
5063 if(not geturls_text):
5064 return False;
5065 downloadsize = None;
5066 if(downloadsize is not None):
5067 downloadsize = int(downloadsize);
5068 if downloadsize is None: downloadsize = 0;
5069 fulldatasize = 0;
5070 prevdownsize = 0;
5071 log.info("Downloading URL "+httpurl);
5072 with BytesIO() as strbuf:
5073 while True:
5074 databytes = geturls_text.read(buffersize);
5075 if not databytes: break;
5076 datasize = len(databytes);
5077 fulldatasize = datasize + fulldatasize;
5078 percentage = "";
5079 if(downloadsize>0):
5080 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
5081 downloaddiff = fulldatasize - prevdownsize;
5082 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
5083 prevdownsize = fulldatasize;
5084 strbuf.write(databytes);
5085 strbuf.seek(0);
5086 returnval_content = strbuf.read();
5087 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl, 'Code': None};
5088 geturls_text.close();
5089 return returnval;
5091 if(not haveparamiko):
5092 def download_from_url_with_sftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
5093 return False;
5095 if(haveparamiko):
5096 def download_from_url_file_with_sftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
5097 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
5098 exec_time_start = time.time();
5099 myhash = hashlib.new("sha1");
5100 if(sys.version[0]=="2"):
5101 myhash.update(httpurl);
5102 myhash.update(str(buffersize));
5103 myhash.update(str(exec_time_start));
5104 if(sys.version[0]>="3"):
5105 myhash.update(httpurl.encode('utf-8'));
5106 myhash.update(str(buffersize).encode('utf-8'));
5107 myhash.update(str(exec_time_start).encode('utf-8'));
5108 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
5109 if(sleep<0):
5110 sleep = geturls_download_sleep;
5111 if(timeout<=0):
5112 timeout = 10;
5113 pretmpfilename = download_from_url_with_sftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
5114 if(not pretmpfilename):
5115 return False;
5116 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
5117 tmpfilename = f.name;
5118 try:
5119 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
5120 except AttributeError:
5121 try:
5122 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
5123 except ValueError:
5124 pass;
5125 except ValueError:
5126 pass;
5127 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
5128 f.write(pretmpfilename.get('Content'));
5129 f.close();
5130 exec_time_end = time.time();
5131 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
5132 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
5133 return returnval;
5135 if(not haveparamiko):
5136 def download_from_url_file_with_sftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
5137 return False;
5139 if(haveparamiko):
5140 def download_from_url_to_file_with_sftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
5141 global geturls_download_sleep, havezstd, havebrotli;
5142 if(sleep<0):
5143 sleep = geturls_download_sleep;
5144 if(timeout<=0):
5145 timeout = 10;
5146 if(not outfile=="-"):
5147 outpath = outpath.rstrip(os.path.sep);
5148 filepath = os.path.realpath(outpath+os.path.sep+outfile);
5149 if(not os.path.exists(outpath)):
5150 os.makedirs(outpath);
5151 if(os.path.exists(outpath) and os.path.isfile(outpath)):
5152 return False;
5153 if(os.path.exists(filepath) and os.path.isdir(filepath)):
5154 return False;
5155 pretmpfilename = download_from_url_file_with_sftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
5156 if(not pretmpfilename):
5157 return False;
5158 tmpfilename = pretmpfilename.get('Filename');
5159 downloadsize = int(os.path.getsize(tmpfilename));
5160 fulldatasize = 0;
5161 log.info("Moving file "+tmpfilename+" to "+filepath);
5162 exec_time_start = time.time();
5163 shutil.move(tmpfilename, filepath);
5164 exec_time_end = time.time();
5165 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
5166 if(os.path.exists(tmpfilename)):
5167 os.remove(tmpfilename);
5168 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
5169 if(outfile=="-"):
5170 pretmpfilename = download_from_url_file_with_sftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
5171 tmpfilename = pretmpfilename.get('Filename');
5172 downloadsize = int(os.path.getsize(tmpfilename));
5173 fulldatasize = 0;
5174 prevdownsize = 0;
5175 exec_time_start = time.time();
5176 with open(tmpfilename, 'rb') as ft:
5177 f = BytesIO();
5178 while True:
5179 databytes = ft.read(buffersize[1]);
5180 if not databytes: break;
5181 datasize = len(databytes);
5182 fulldatasize = datasize + fulldatasize;
5183 percentage = "";
5184 if(downloadsize>0):
5185 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
5186 downloaddiff = fulldatasize - prevdownsize;
5187 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
5188 prevdownsize = fulldatasize;
5189 f.write(databytes);
5190 f.seek(0);
5191 fdata = f.getvalue();
5192 f.close();
5193 ft.close();
5194 os.remove(tmpfilename);
5195 exec_time_end = time.time();
5196 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
5197 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
5198 return returnval;
5200 if(not haveparamiko):
5201 def download_from_url_to_file_with_sftp(httpurl, httpheaders=geturls_headers, httpuseragent=None, httpreferer=None, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
5202 return False;
5204 if(haveparamiko):
5205 def upload_file_to_sftp_file(sftpfile, url):
5206 urlparts = urlparse.urlparse(url);
5207 file_name = os.path.basename(urlparts.path);
5208 file_dir = os.path.dirname(urlparts.path);
5209 sftp_port = urlparts.port;
5210 if(urlparts.scheme=="http" or urlparts.scheme=="https"):
5211 return False;
5212 if(urlparts.port is None):
5213 sftp_port = 22;
5214 else:
5215 sftp_port = urlparts.port;
5216 if(urlparts.username is not None):
5217 sftp_username = urlparts.username;
5218 else:
5219 sftp_username = "anonymous";
5220 if(urlparts.password is not None):
5221 sftp_password = urlparts.password;
5222 elif(urlparts.password is None and urlparts.username=="anonymous"):
5223 sftp_password = "anonymous";
5224 else:
5225 sftp_password = "";
5226 if(urlparts.scheme!="sftp"):
5227 return False;
5228 ssh = paramiko.SSHClient();
5229 ssh.load_system_host_keys();
5230 ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy());
5231 try:
5232 ssh.connect(urlparts.hostname, port=sftp_port, username=urlparts.username, password=urlparts.password);
5233 except paramiko.ssh_exception.SSHException:
5234 return False;
5235 except socket.gaierror:
5236 log.info("Error With URL "+httpurl);
5237 return False;
5238 except socket.timeout:
5239 log.info("Error With URL "+httpurl);
5240 return False;
5241 sftp = ssh.open_sftp();
5242 sftp.putfo(sftpfile, urlparts.path);
5243 sftp.close();
5244 ssh.close();
5245 sftpfile.seek(0, 0);
5246 return sftpfile;
5247 else:
5248 def upload_file_to_sftp_file(sftpfile, url):
5249 return False;
5251 if(haveparamiko):
5252 def upload_file_to_sftp_string(sftpstring, url):
5253 sftpfileo = BytesIO(sftpstring);
5254 sftpfile = upload_file_to_sftp_files(ftpfileo, url);
5255 sftpfileo.close();
5256 return sftpfile;
5257 else:
5258 def upload_file_to_sftp_string(url):
5259 return False;
5262 if(havepysftp):
5263 def download_file_from_pysftp_file(url):
5264 urlparts = urlparse.urlparse(url);
5265 file_name = os.path.basename(urlparts.path);
5266 file_dir = os.path.dirname(urlparts.path);
5267 if(urlparts.scheme=="http" or urlparts.scheme=="https"):
5268 return False;
5269 sftp_port = urlparts.port;
5270 if(urlparts.port is None):
5271 sftp_port = 22;
5272 else:
5273 sftp_port = urlparts.port;
5274 if(urlparts.username is not None):
5275 sftp_username = urlparts.username;
5276 else:
5277 sftp_username = "anonymous";
5278 if(urlparts.password is not None):
5279 sftp_password = urlparts.password;
5280 elif(urlparts.password is None and urlparts.username=="anonymous"):
5281 sftp_password = "anonymous";
5282 else:
5283 sftp_password = "";
5284 if(urlparts.scheme!="sftp"):
5285 return False;
5286 try:
5287 pysftp.Connection(urlparts.hostname, port=sftp_port, username=urlparts.username, password=urlparts.password);
5288 except paramiko.ssh_exception.SSHException:
5289 return False;
5290 except socket.gaierror:
5291 log.info("Error With URL "+httpurl);
5292 return False;
5293 except socket.timeout:
5294 log.info("Error With URL "+httpurl);
5295 return False;
5296 sftp = ssh.open_sftp();
5297 sftpfile = BytesIO();
5298 sftp.getfo(urlparts.path, sftpfile);
5299 sftp.close();
5300 ssh.close();
5301 sftpfile.seek(0, 0);
5302 return sftpfile;
5303 else:
5304 def download_file_from_pysftp_file(url):
5305 return False;
5307 if(havepysftp):
5308 def download_file_from_pysftp_string(url):
5309 sftpfile = download_file_from_pysftp_file(url);
5310 return sftpfile.read();
5311 else:
5312 def download_file_from_ftp_string(url):
5313 return False;
5315 if(havepysftp):
5316 def download_from_url_with_pysftp(httpurl, httpheaders=geturls_headers, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
5317 global geturls_download_sleep, havezstd, havebrotli;
5318 if(sleep<0):
5319 sleep = geturls_download_sleep;
5320 if(timeout<=0):
5321 timeout = 10;
5322 urlparts = urlparse.urlparse(httpurl);
5323 if(isinstance(httpheaders, list)):
5324 httpheaders = make_http_headers_from_list_to_dict(httpheaders);
5325 httpheaders = fix_header_names(httpheaders);
5326 if(isinstance(httpheaders, dict)):
5327 httpheaders = make_http_headers_from_dict_to_list(httpheaders);
5328 time.sleep(sleep);
5329 geturls_text = download_file_from_pysftp_file(httpurl);
5330 if(not geturls_text):
5331 return False;
5332 downloadsize = None;
5333 if(downloadsize is not None):
5334 downloadsize = int(downloadsize);
5335 if downloadsize is None: downloadsize = 0;
5336 fulldatasize = 0;
5337 prevdownsize = 0;
5338 log.info("Downloading URL "+httpurl);
5339 with BytesIO() as strbuf:
5340 while True:
5341 databytes = geturls_text.read(buffersize);
5342 if not databytes: break;
5343 datasize = len(databytes);
5344 fulldatasize = datasize + fulldatasize;
5345 percentage = "";
5346 if(downloadsize>0):
5347 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
5348 downloaddiff = fulldatasize - prevdownsize;
5349 log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
5350 prevdownsize = fulldatasize;
5351 strbuf.write(databytes);
5352 strbuf.seek(0);
5353 returnval_content = strbuf.read();
5354 returnval = {'Type': "Content", 'Content': returnval_content, 'Contentsize': fulldatasize, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize, 2, "IEC"), 'SI': get_readable_size(fulldatasize, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl, 'Code': None};
5355 geturls_text.close();
5356 return returnval;
5358 if(not havepysftp):
5359 def download_from_url_with_pysftp(httpurl, httpheaders=geturls_headers, httpcookie=geturls_cj, httpmethod="GET", postdata=None, buffersize=524288, sleep=-1, timeout=10):
5360 return False;
5362 if(havepysftp):
5363 def download_from_url_file_with_pysftp(httpurl, httpheaders=geturls_headers, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
5364 global geturls_download_sleep, havezstd, havebrotli, tmpfileprefix, tmpfilesuffix;
5365 exec_time_start = time.time();
5366 myhash = hashlib.new("sha1");
5367 if(sys.version[0]=="2"):
5368 myhash.update(httpurl);
5369 myhash.update(str(buffersize));
5370 myhash.update(str(exec_time_start));
5371 if(sys.version[0]>="3"):
5372 myhash.update(httpurl.encode('utf-8'));
5373 myhash.update(str(buffersize).encode('utf-8'));
5374 myhash.update(str(exec_time_start).encode('utf-8'));
5375 newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
5376 if(sleep<0):
5377 sleep = geturls_download_sleep;
5378 if(timeout<=0):
5379 timeout = 10;
5380 pretmpfilename = download_from_url_with_pysftp(httpurl, httpheaders, httpuseragent, httpreferer, httpcookie, httpmethod, postdata, buffersize, sleep, timeout);
5381 if(not pretmpfilename):
5382 return False;
5383 with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
5384 tmpfilename = f.name;
5385 try:
5386 os.utime(tmpfilename, (time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple()), time.mktime(email.utils.parsedate_to_datetime(pretmpfilename.get('Headers').get('Last-Modified')).timetuple())));
5387 except AttributeError:
5388 try:
5389 os.utime(tmpfilename, (time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time.mktime(datetime.datetime.strptime(pretmpfilename.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
5390 except ValueError:
5391 pass;
5392 except ValueError:
5393 pass;
5394 returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': pretmpfilename.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
5395 f.write(pretmpfilename.get('Content'));
5396 f.close();
5397 exec_time_end = time.time();
5398 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
5399 returnval.update({'Filesize': os.path.getsize(tmpfilename), 'FilesizeAlt': {'IEC': get_readable_size(os.path.getsize(tmpfilename), 2, "IEC"), 'SI': get_readable_size(os.path.getsize(tmpfilename), 2, "SI")}, 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
5400 return returnval;
5402 if(not havepysftp):
5403 def download_from_url_file_with_pysftp(httpurl, httpheaders=geturls_headers, httpcookie=geturls_cj, httpmethod="GET", postdata=None, ranges=[None, None], buffersize=524288, sleep=-1, timeout=10):
5404 return False;
5406 if(havepysftp):
5407 def download_from_url_to_file_with_pysftp(httpurl, httpheaders=geturls_headers, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
5408 global geturls_download_sleep, havezstd, havebrotli;
5409 if(sleep<0):
5410 sleep = geturls_download_sleep;
5411 if(timeout<=0):
5412 timeout = 10;
5413 if(not outfile=="-"):
5414 outpath = outpath.rstrip(os.path.sep);
5415 filepath = os.path.realpath(outpath+os.path.sep+outfile);
5416 if(not os.path.exists(outpath)):
5417 os.makedirs(outpath);
5418 if(os.path.exists(outpath) and os.path.isfile(outpath)):
5419 return False;
5420 if(os.path.exists(filepath) and os.path.isdir(filepath)):
5421 return False;
5422 pretmpfilename = download_from_url_file_with_pysftp(httpurl, httpheaders, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
5423 if(not pretmpfilename):
5424 return False;
5425 tmpfilename = pretmpfilename.get('Filename');
5426 downloadsize = int(os.path.getsize(tmpfilename));
5427 fulldatasize = 0;
5428 log.info("Moving file "+tmpfilename+" to "+filepath);
5429 exec_time_start = time.time();
5430 shutil.move(tmpfilename, filepath);
5431 exec_time_end = time.time();
5432 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
5433 if(os.path.exists(tmpfilename)):
5434 os.remove(tmpfilename);
5435 returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
5436 if(outfile=="-"):
5437 pretmpfilename = download_from_url_file_with_pysftp(httpurl, httpheaders, httpcookie, httpmethod, postdata, ranges, buffersize[0], sleep, timeout);
5438 tmpfilename = pretmpfilename.get('Filename');
5439 downloadsize = int(os.path.getsize(tmpfilename));
5440 fulldatasize = 0;
5441 prevdownsize = 0;
5442 exec_time_start = time.time();
5443 with open(tmpfilename, 'rb') as ft:
5444 f = BytesIO();
5445 while True:
5446 databytes = ft.read(buffersize[1]);
5447 if not databytes: break;
5448 datasize = len(databytes);
5449 fulldatasize = datasize + fulldatasize;
5450 percentage = "";
5451 if(downloadsize>0):
5452 percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
5453 downloaddiff = fulldatasize - prevdownsize;
5454 log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
5455 prevdownsize = fulldatasize;
5456 f.write(databytes);
5457 f.seek(0);
5458 fdata = f.getvalue();
5459 f.close();
5460 ft.close();
5461 os.remove(tmpfilename);
5462 exec_time_end = time.time();
5463 log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
5464 returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename.get('Headers'), 'Version': pretmpfilename.get('Version'), 'Method': pretmpfilename.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename.get('HeadersSent'), 'URL': pretmpfilename.get('URL'), 'Code': pretmpfilename.get('Code'), 'Reason': pretmpfilename.get('Reason'), 'HTTPLib': pretmpfilename.get('HTTPLib')};
5465 return returnval;
5467 if(not havepysftp):
5468 def download_from_url_to_file_with_pysftp(httpurl, httpheaders=geturls_headers, httpcookie=geturls_cj, httpmethod="GET", postdata=None, outfile="-", outpath=os.getcwd(), ranges=[None, None], buffersize=[524288, 524288], sleep=-1, timeout=10):
5469 return False;
5471 if(havepysftp):
5472 def upload_file_to_pysftp_file(sftpfile, url):
5473 urlparts = urlparse.urlparse(url);
5474 file_name = os.path.basename(urlparts.path);
5475 file_dir = os.path.dirname(urlparts.path);
5476 sftp_port = urlparts.port;
5477 if(urlparts.scheme=="http" or urlparts.scheme=="https"):
5478 return False;
5479 if(urlparts.port is None):
5480 sftp_port = 22;
5481 else:
5482 sftp_port = urlparts.port;
5483 if(urlparts.username is not None):
5484 sftp_username = urlparts.username;
5485 else:
5486 sftp_username = "anonymous";
5487 if(urlparts.password is not None):
5488 sftp_password = urlparts.password;
5489 elif(urlparts.password is None and urlparts.username=="anonymous"):
5490 sftp_password = "anonymous";
5491 else:
5492 sftp_password = "";
5493 if(urlparts.scheme!="sftp"):
5494 return False;
5495 try:
5496 pysftp.Connection(urlparts.hostname, port=sftp_port, username=urlparts.username, password=urlparts.password);
5497 except paramiko.ssh_exception.SSHException:
5498 return False;
5499 except socket.gaierror:
5500 log.info("Error With URL "+httpurl);
5501 return False;
5502 except socket.timeout:
5503 log.info("Error With URL "+httpurl);
5504 return False;
5505 sftp = ssh.open_sftp();
5506 sftp.putfo(sftpfile, urlparts.path);
5507 sftp.close();
5508 ssh.close();
5509 sftpfile.seek(0, 0);
5510 return sftpfile;
5511 else:
5512 def upload_file_to_pysftp_file(sftpfile, url):
5513 return False;
5515 if(havepysftp):
5516 def upload_file_to_pysftp_string(sftpstring, url):
5517 sftpfileo = BytesIO(sftpstring);
5518 sftpfile = upload_file_to_pysftp_files(ftpfileo, url);
5519 sftpfileo.close();
5520 return sftpfile;
5521 else:
5522 def upload_file_to_pysftp_string(url):
5523 return False;