4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwgetold.py - Last Update: 10/5/2023 Ver. 2.0.2 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, zlib
, time
, argparse
, subprocess
, socket
, email
.utils
, datetime
, time
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
24 from cgi
import parse_qsl
;
27 from urlparse
import parse_qsl
;
29 from urllib
.parse
import parse_qsl
;
30 except (DeprecationWarning, TypeError):
32 from urlparse
import parse_qsl
;
34 from urllib
.parse
import parse_qsl
;
41 havemechanize
= False;
46 havemechanize
= False;
74 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
114 if(sys
.version
[0]=="2"):
116 from io
import StringIO
, BytesIO
;
119 from cStringIO
import StringIO
;
120 from cStringIO
import StringIO
as BytesIO
;
122 from StringIO
import StringIO
;
123 from StringIO
import StringIO
as BytesIO
;
124 # From http://python-future.org/compatible_idioms.html
125 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
126 from urllib
import urlencode
;
127 from urllib
import urlopen
as urlopenalt
;
128 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
, URLError
, build_opener
, HTTPCookieProcessor
;
129 import urlparse
, cookielib
;
130 from httplib
import HTTPConnection
, HTTPSConnection
;
131 if(sys
.version
[0]>="3"):
132 from io
import StringIO
, BytesIO
;
133 # From http://python-future.org/compatible_idioms.html
134 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
135 from urllib
.request
import urlopen
, Request
, install_opener
, build_opener
, HTTPCookieProcessor
;
136 from urllib
.error
import HTTPError
, URLError
;
137 import urllib
.parse
as urlparse
;
138 import http
.cookiejar
as cookielib
;
139 from http
.client
import HTTPConnection
, HTTPSConnection
;
141 __program_name__
= "PyWWW-Get";
142 __program_alt_name__
= "PyWWWGet";
143 __program_small_name__
= "wwwget";
144 __project__
= __program_name__
;
145 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
146 __version_info__
= (2, 0, 2, "RC 1", 1);
147 __version_date_info__
= (2023, 10, 5, "RC 1", 1);
148 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
149 __revision__
= __version_info__
[3];
150 __revision_id__
= "$Id$";
151 if(__version_info__
[4] is not None):
152 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
153 if(__version_info__
[4] is None):
154 __version_date_plusrc__
= __version_date__
;
155 if(__version_info__
[3] is not None):
156 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
157 if(__version_info__
[3] is None):
158 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
160 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
162 pytempdir
= tempfile
.gettempdir();
164 PyBitness
= platform
.architecture();
165 if(PyBitness
=="32bit" or PyBitness
=="32"):
167 elif(PyBitness
=="64bit" or PyBitness
=="64"):
172 compression_supported_list
= ['identity', 'gzip', 'deflate', 'bzip2'];
174 compression_supported_list
.append('br');
176 compression_supported_list
.append('zstd');
178 compression_supported_list
.append('lzma');
179 compression_supported_list
.append('xz');
180 compression_supported
= ', '.join(compression_supported_list
);
182 geturls_cj
= cookielib
.CookieJar();
183 windowsNT4_ua_string
= "Windows NT 4.0";
184 windowsNT4_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
185 windows2k_ua_string
= "Windows NT 5.0";
186 windows2k_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
187 windowsXP_ua_string
= "Windows NT 5.1";
188 windowsXP_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
189 windowsXP64_ua_string
= "Windows NT 5.2; Win64; x64";
190 windowsXP64_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
191 windows7_ua_string
= "Windows NT 6.1; Win64; x64";
192 windows7_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
193 windows8_ua_string
= "Windows NT 6.2; Win64; x64";
194 windows8_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
195 windows81_ua_string
= "Windows NT 6.3; Win64; x64";
196 windows81_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
197 windows10_ua_string
= "Windows NT 10.0; Win64; x64";
198 windows10_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
199 windows11_ua_string
= "Windows NT 11.0; Win64; x64";
200 windows11_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
201 geturls_ua_firefox_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:109.0) Gecko/20100101 Firefox/117.0";
202 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
203 geturls_ua_chrome_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
204 geturls_ua_chromium_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
205 geturls_ua_palemoon_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
206 geturls_ua_opera_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
207 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
208 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; Trident/7.0; rv:11.0) like Gecko";
209 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
210 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
211 if(platform
.python_implementation()!=""):
212 py_implementation
= platform
.python_implementation();
213 if(platform
.python_implementation()==""):
214 py_implementation
= "Python";
215 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=py_implementation
, pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
216 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
217 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
218 geturls_ua
= geturls_ua_firefox_windows7
;
219 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
220 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
221 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
222 geturls_headers_chrome_windows7
.update(windows7_ua_addon
);
223 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
224 geturls_headers_chromium_windows7
.update(windows7_ua_addon
);
225 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
226 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
227 geturls_headers_opera_windows7
.update(windows7_ua_addon
);
228 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
229 geturls_headers_vivaldi_windows7
.update(windows7_ua_addon
);
230 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
231 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
232 geturls_headers_microsoft_edge_windows7
.update(windows7_ua_addon
);
233 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
234 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
235 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
236 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
237 geturls_headers
= geturls_headers_firefox_windows7
;
238 geturls_download_sleep
= 0;
240 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
241 if(outtype
=="print" and dbgenable
):
244 elif(outtype
=="log" and dbgenable
):
245 logging
.info(dbgtxt
);
247 elif(outtype
=="warning" and dbgenable
):
248 logging
.warning(dbgtxt
);
250 elif(outtype
=="error" and dbgenable
):
251 logging
.error(dbgtxt
);
253 elif(outtype
=="critical" and dbgenable
):
254 logging
.critical(dbgtxt
);
256 elif(outtype
=="exception" and dbgenable
):
257 logging
.exception(dbgtxt
);
259 elif(outtype
=="logalt" and dbgenable
):
260 logging
.log(dgblevel
, dbgtxt
);
262 elif(outtype
=="debug" and dbgenable
):
263 logging
.debug(dbgtxt
);
271 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
272 dbgout
= verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
277 def add_url_param(url
, **params
):
279 parts
= list(urlparse
.urlsplit(url
));
280 d
= dict(parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
282 parts
[n
]=urlencode(d
);
283 return urlparse
.urlunsplit(parts
);
285 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
286 def which_exec(execfile):
287 for path
in os
.environ
["PATH"].split(":"):
288 if os
.path
.exists(path
+ "/" + execfile):
289 return path
+ "/" + execfile;
291 def listize(varlist
):
299 newlistreg
.update({ilx
: varlist
[il
]});
300 newlistrev
.update({varlist
[il
]: ilx
});
303 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
306 def twolistize(varlist
):
316 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
317 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
318 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
319 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
322 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
323 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
324 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
327 def arglistize(proexec
, *varlist
):
331 newarglist
= [proexec
];
333 if varlist
[il
][0] is not None:
334 newarglist
.append(varlist
[il
][0]);
335 if varlist
[il
][1] is not None:
336 newarglist
.append(varlist
[il
][1]);
340 def fix_header_names(header_dict
):
341 if(sys
.version
[0]=="2"):
342 header_dict
= {k
.title(): v
for k
, v
in header_dict
.iteritems()};
343 if(sys
.version
[0]>="3"):
344 header_dict
= {k
.title(): v
for k
, v
in header_dict
.items()};
347 # hms_string by ArcGIS Python Recipes
348 # https://arcpy.wordpress.com/2012/04/20/146/
349 def hms_string(sec_elapsed
):
350 h
= int(sec_elapsed
/ (60 * 60));
351 m
= int((sec_elapsed
% (60 * 60)) / 60);
352 s
= sec_elapsed
% 60.0;
353 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
355 # get_readable_size by Lipis
356 # http://stackoverflow.com/posts/14998888/revisions
357 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
359 if(unit
!="IEC" and unit
!="SI"):
362 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
363 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
366 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
367 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
372 if abs(bytes
) < unitsize
:
373 strformat
= "%3."+str(precision
)+"f%s";
374 pre_return_val
= (strformat
% (bytes
, unit
));
375 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
376 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
377 alt_return_val
= pre_return_val
.split();
378 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
381 strformat
= "%."+str(precision
)+"f%s";
382 pre_return_val
= (strformat
% (bytes
, "YiB"));
383 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
384 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
385 alt_return_val
= pre_return_val
.split();
386 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
389 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
391 usehashtypes
= usehashtypes
.lower();
392 getfilesize
= os
.path
.getsize(infile
);
393 return_val
= get_readable_size(getfilesize
, precision
, unit
);
395 hashtypelist
= usehashtypes
.split(",");
396 openfile
= open(infile
, "rb");
397 filecontents
= openfile
.read();
400 listnumend
= len(hashtypelist
);
401 while(listnumcount
< listnumend
):
402 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
403 hashtypelistup
= hashtypelistlow
.upper();
404 filehash
= hashlib
.new(hashtypelistup
);
405 filehash
.update(filecontents
);
406 filegethash
= filehash
.hexdigest();
407 return_val
.update({hashtypelistup
: filegethash
});
411 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
413 usehashtypes
= usehashtypes
.lower();
414 getfilesize
= len(instring
);
415 return_val
= get_readable_size(getfilesize
, precision
, unit
);
417 hashtypelist
= usehashtypes
.split(",");
419 listnumend
= len(hashtypelist
);
420 while(listnumcount
< listnumend
):
421 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
422 hashtypelistup
= hashtypelistlow
.upper();
423 filehash
= hashlib
.new(hashtypelistup
);
424 if(sys
.version
[0]=="2"):
425 filehash
.update(instring
);
426 if(sys
.version
[0]>="3"):
427 filehash
.update(instring
.encode('utf-8'));
428 filegethash
= filehash
.hexdigest();
429 return_val
.update({hashtypelistup
: filegethash
});
433 def http_status_to_reason(code
):
436 101: 'Switching Protocols',
441 203: 'Non-Authoritative Information',
443 205: 'Reset Content',
444 206: 'Partial Content',
446 208: 'Already Reported',
448 300: 'Multiple Choices',
449 301: 'Moved Permanently',
454 307: 'Temporary Redirect',
455 308: 'Permanent Redirect',
458 402: 'Payment Required',
461 405: 'Method Not Allowed',
462 406: 'Not Acceptable',
463 407: 'Proxy Authentication Required',
464 408: 'Request Timeout',
467 411: 'Length Required',
468 412: 'Precondition Failed',
469 413: 'Payload Too Large',
471 415: 'Unsupported Media Type',
472 416: 'Range Not Satisfiable',
473 417: 'Expectation Failed',
474 421: 'Misdirected Request',
475 422: 'Unprocessable Entity',
477 424: 'Failed Dependency',
478 426: 'Upgrade Required',
479 428: 'Precondition Required',
480 429: 'Too Many Requests',
481 431: 'Request Header Fields Too Large',
482 451: 'Unavailable For Legal Reasons',
483 500: 'Internal Server Error',
484 501: 'Not Implemented',
486 503: 'Service Unavailable',
487 504: 'Gateway Timeout',
488 505: 'HTTP Version Not Supported',
489 506: 'Variant Also Negotiates',
490 507: 'Insufficient Storage',
491 508: 'Loop Detected',
493 511: 'Network Authentication Required'
495 return reasons
.get(code
, 'Unknown Status Code');
497 def ftp_status_to_reason(code
):
499 110: 'Restart marker reply',
500 120: 'Service ready in nnn minutes',
501 125: 'Data connection already open; transfer starting',
502 150: 'File status okay; about to open data connection',
504 202: 'Command not implemented, superfluous at this site',
505 211: 'System status, or system help reply',
506 212: 'Directory status',
509 215: 'NAME system type',
510 220: 'Service ready for new user',
511 221: 'Service closing control connection',
512 225: 'Data connection open; no transfer in progress',
513 226: 'Closing data connection',
514 227: 'Entering Passive Mode',
515 230: 'User logged in, proceed',
516 250: 'Requested file action okay, completed',
517 257: '"PATHNAME" created',
518 331: 'User name okay, need password',
519 332: 'Need account for login',
520 350: 'Requested file action pending further information',
521 421: 'Service not available, closing control connection',
522 425: 'Can\'t open data connection',
523 426: 'Connection closed; transfer aborted',
524 450: 'Requested file action not taken',
525 451: 'Requested action aborted. Local error in processing',
526 452: 'Requested action not taken. Insufficient storage space in system',
527 500: 'Syntax error, command unrecognized',
528 501: 'Syntax error in parameters or arguments',
529 502: 'Command not implemented',
530 503: 'Bad sequence of commands',
531 504: 'Command not implemented for that parameter',
532 530: 'Not logged in',
533 532: 'Need account for storing files',
534 550: 'Requested action not taken. File unavailable',
535 551: 'Requested action aborted. Page type unknown',
536 552: 'Requested file action aborted. Exceeded storage allocation',
537 553: 'Requested action not taken. File name not allowed'
539 return reasons
.get(code
, 'Unknown Status Code');
541 def sftp_status_to_reason(code
):
545 2: 'SSH_FX_NO_SUCH_FILE',
546 3: 'SSH_FX_PERMISSION_DENIED',
548 5: 'SSH_FX_BAD_MESSAGE',
549 6: 'SSH_FX_NO_CONNECTION',
550 7: 'SSH_FX_CONNECTION_LOST',
551 8: 'SSH_FX_OP_UNSUPPORTED'
553 return reasons
.get(code
, 'Unknown Status Code');
555 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
556 if isinstance(headers
, dict):
558 if(sys
.version
[0]=="2"):
559 for headkey
, headvalue
in headers
.iteritems():
560 returnval
.append((headkey
, headvalue
));
561 if(sys
.version
[0]>="3"):
562 for headkey
, headvalue
in headers
.items():
563 returnval
.append((headkey
, headvalue
));
564 elif isinstance(headers
, list):
570 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
571 if isinstance(headers
, dict):
573 if(sys
.version
[0]=="2"):
574 for headkey
, headvalue
in headers
.iteritems():
575 returnval
.append(headkey
+": "+headvalue
);
576 if(sys
.version
[0]>="3"):
577 for headkey
, headvalue
in headers
.items():
578 returnval
.append(headkey
+": "+headvalue
);
579 elif isinstance(headers
, list):
585 def make_http_headers_from_pycurl_to_dict(headers
):
587 headers
= headers
.strip().split('\r\n');
588 for header
in headers
:
589 parts
= header
.split(': ', 1)
592 header_dict
[key
.title()] = value
;
595 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", compression_supported
), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
596 if isinstance(headers
, list):
601 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
603 elif isinstance(headers
, dict):
609 def get_httplib_support(checkvalue
=None):
610 global haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
612 returnval
.append("ftp");
613 returnval
.append("httplib");
615 returnval
.append("httplib2");
616 returnval
.append("urllib");
618 returnval
.append("urllib3");
619 returnval
.append("request3");
620 returnval
.append("request");
622 returnval
.append("requests");
624 returnval
.append("aiohttp");
626 returnval
.append("httpx");
627 returnval
.append("httpx2");
629 returnval
.append("mechanize");
631 returnval
.append("pycurl");
632 if(hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
633 returnval
.append("pycurl2");
634 if(hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
635 returnval
.append("pycurl3");
637 returnval
.append("sftp");
639 returnval
.append("pysftp");
640 if(not checkvalue
is None):
641 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
642 checkvalue
= "urllib";
643 if(checkvalue
=="httplib1"):
644 checkvalue
= "httplib";
645 if(checkvalue
in returnval
):
651 def check_httplib_support(checkvalue
="urllib"):
652 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
653 checkvalue
= "urllib";
654 if(checkvalue
=="httplib1"):
655 checkvalue
= "httplib";
656 returnval
= get_httplib_support(checkvalue
);
659 def get_httplib_support_list():
660 returnval
= get_httplib_support(None);
663 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1, timeout
=10):
664 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
666 sleep
= geturls_download_sleep
;
669 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
670 httplibuse
= "urllib";
671 if(httplibuse
=="httplib1"):
672 httplibuse
= "httplib";
673 if(not haverequests
and httplibuse
=="requests"):
674 httplibuse
= "urllib";
675 if(not haveaiohttp
and httplibuse
=="aiohttp"):
676 httplibuse
= "urllib";
677 if(not havehttpx
and httplibuse
=="httpx"):
678 httplibuse
= "urllib";
679 if(not havehttpx
and httplibuse
=="httpx2"):
680 httplibuse
= "urllib";
681 if(not havehttpcore
and httplibuse
=="httpcore"):
682 httplibuse
= "urllib";
683 if(not havehttpcore
and httplibuse
=="httpcore2"):
684 httplibuse
= "urllib";
685 if(not havemechanize
and httplibuse
=="mechanize"):
686 httplibuse
= "urllib";
687 if(not havepycurl
and httplibuse
=="pycurl"):
688 httplibuse
= "urllib";
689 if(not havepycurl
and httplibuse
=="pycurl2"):
690 httplibuse
= "urllib";
691 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
692 httplibuse
= "pycurl";
693 if(not havepycurl
and httplibuse
=="pycurl3"):
694 httplibuse
= "urllib";
695 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
696 httplibuse
= "pycurl2";
697 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
698 httplibuse
= "pycurl";
699 if(not havehttplib2
and httplibuse
=="httplib2"):
700 httplibuse
= "httplib";
701 if(not haveparamiko
and httplibuse
=="sftp"):
703 if(not havepysftp
and httplibuse
=="pysftp"):
705 if(httplibuse
=="urllib" or httplibuse
=="request"):
706 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
707 elif(httplibuse
=="request"):
708 returnval
= download_from_url_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
709 elif(httplibuse
=="request3"):
710 returnval
= download_from_url_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
711 elif(httplibuse
=="httplib"):
712 returnval
= download_from_url_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
713 elif(httplibuse
=="httplib2"):
714 returnval
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
715 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
716 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
717 elif(httplibuse
=="requests"):
718 returnval
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
719 elif(httplibuse
=="aiohttp"):
720 returnval
= download_from_url_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
721 elif(httplibuse
=="httpx"):
722 returnval
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
723 elif(httplibuse
=="httpx2"):
724 returnval
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
725 elif(httplibuse
=="httpcore"):
726 returnval
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
727 elif(httplibuse
=="httpcore2"):
728 returnval
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
729 elif(httplibuse
=="mechanize"):
730 returnval
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
731 elif(httplibuse
=="pycurl"):
732 returnval
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
733 elif(httplibuse
=="pycurl2"):
734 returnval
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
735 elif(httplibuse
=="pycurl3"):
736 returnval
= download_from_url_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
737 elif(httplibuse
=="ftp"):
738 returnval
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
739 elif(httplibuse
=="sftp"):
740 returnval
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
741 elif(httplibuse
=="pysftp"):
742 returnval
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
747 def download_from_url_from_list(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1, timeout
=10):
748 if(isinstance(httpurl
, list)):
750 elif(isinstance(httpurl
, tuple)):
752 elif(isinstance(httpurl
, dict)):
753 httpurl
= httpurl
.values();
756 listsize
= len(httpurl
);
759 while(listcount
<listsize
):
760 ouputval
= download_from_url(httpurl
[listcount
], httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, buffersize
, sleep
, timeout
);
761 returnval
.append(ouputval
);
765 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
766 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
768 sleep
= geturls_download_sleep
;
771 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
772 httplibuse
= "urllib";
773 if(httplibuse
=="httplib1"):
774 httplibuse
= "httplib";
775 if(not haverequests
and httplibuse
=="requests"):
776 httplibuse
= "urllib";
777 if(not haveaiohttp
and httplibuse
=="aiohttp"):
778 httplibuse
= "urllib";
779 if(not havehttpx
and httplibuse
=="httpx"):
780 httplibuse
= "urllib";
781 if(not havehttpx
and httplibuse
=="httpx2"):
782 httplibuse
= "urllib";
783 if(not havehttpcore
and httplibuse
=="httpcore"):
784 httplibuse
= "urllib";
785 if(not havehttpcore
and httplibuse
=="httpcore2"):
786 httplibuse
= "urllib";
787 if(not havemechanize
and httplibuse
=="mechanize"):
788 httplibuse
= "urllib";
789 if(not havepycurl
and httplibuse
=="pycurl"):
790 httplibuse
= "urllib";
791 if(not havepycurl
and httplibuse
=="pycurl2"):
792 httplibuse
= "urllib";
793 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
794 httplibuse
= "pycurl";
795 if(not havepycurl
and httplibuse
=="pycurl3"):
796 httplibuse
= "urllib";
797 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
798 httplibuse
= "pycurl2";
799 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
800 httplibuse
= "pycurl";
801 if(not havehttplib2
and httplibuse
=="httplib2"):
802 httplibuse
= "httplib";
803 if(not haveparamiko
and httplibuse
=="sftp"):
805 if(not haveparamiko
and httplibuse
=="pysftp"):
807 if(httplibuse
=="urllib" or httplibuse
=="request"):
808 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
809 elif(httplibuse
=="request"):
810 returnval
= download_from_url_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
811 elif(httplibuse
=="request3"):
812 returnval
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
813 elif(httplibuse
=="httplib"):
814 returnval
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
815 elif(httplibuse
=="httplib2"):
816 returnval
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
817 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
818 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
819 elif(httplibuse
=="requests"):
820 returnval
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
821 elif(httplibuse
=="aiohttp"):
822 returnval
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
823 elif(httplibuse
=="httpx"):
824 returnval
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
825 elif(httplibuse
=="httpx2"):
826 returnval
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
827 elif(httplibuse
=="httpcore"):
828 returnval
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
829 elif(httplibuse
=="httpcore2"):
830 returnval
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
831 elif(httplibuse
=="mechanize"):
832 returnval
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
833 elif(httplibuse
=="pycurl"):
834 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
835 elif(httplibuse
=="pycurl2"):
836 returnval
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
837 elif(httplibuse
=="pycurl3"):
838 returnval
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
839 elif(httplibuse
=="ftp"):
840 returnval
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
841 elif(httplibuse
=="sftp"):
842 returnval
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
843 elif(httplibuse
=="pysftp"):
844 returnval
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
849 def download_from_url_file_with_list(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
850 if(isinstance(httpurl
, list)):
852 elif(isinstance(httpurl
, tuple)):
854 elif(isinstance(httpurl
, dict)):
855 httpurl
= httpurl
.values();
858 listsize
= len(httpurl
);
861 while(listcount
<listsize
):
862 ouputval
= download_from_url_file(httpurl
[listcount
], httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, ranges
, buffersize
, sleep
, timeout
);
863 returnval
.append(ouputval
);
867 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
868 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
870 sleep
= geturls_download_sleep
;
873 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
874 httplibuse
= "urllib";
875 if(httplibuse
=="httplib1"):
876 httplibuse
= "httplib";
877 if(not haverequests
and httplibuse
=="requests"):
878 httplibuse
= "urllib";
879 if(not haveaiohttp
and httplibuse
=="aiohttp"):
880 httplibuse
= "urllib";
881 if(not havehttpx
and httplibuse
=="httpx"):
882 httplibuse
= "urllib";
883 if(not havehttpx
and httplibuse
=="httpx2"):
884 httplibuse
= "urllib";
885 if(not havehttpcore
and httplibuse
=="httpcore"):
886 httplibuse
= "urllib";
887 if(not havehttpcore
and httplibuse
=="httpcore2"):
888 httplibuse
= "urllib";
889 if(not havemechanize
and httplibuse
=="mechanize"):
890 httplibuse
= "urllib";
891 if(not havepycurl
and httplibuse
=="pycurl"):
892 httplibuse
= "urllib";
893 if(not havepycurl
and httplibuse
=="pycurl2"):
894 httplibuse
= "urllib";
895 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
896 httplibuse
= "pycurl";
897 if(not havepycurl
and httplibuse
=="pycurl3"):
898 httplibuse
= "urllib";
899 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
900 httplibuse
= "pycurl2";
901 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
902 httplibuse
= "pycurl";
903 if(not havehttplib2
and httplibuse
=="httplib2"):
904 httplibuse
= "httplib";
905 if(not haveparamiko
and httplibuse
=="sftp"):
907 if(not havepysftp
and httplibuse
=="pysftp"):
909 if(httplibuse
=="urllib" or httplibuse
=="request"):
910 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
911 elif(httplibuse
=="request"):
912 returnval
= download_from_url_to_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
913 elif(httplibuse
=="request3"):
914 returnval
= download_from_url_to_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
915 elif(httplibuse
=="httplib"):
916 returnval
= download_from_url_to_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
917 elif(httplibuse
=="httplib2"):
918 returnval
= download_from_url_to_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
919 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
920 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
921 elif(httplibuse
=="requests"):
922 returnval
= download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
923 elif(httplibuse
=="aiohttp"):
924 returnval
= download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
925 elif(httplibuse
=="httpx"):
926 returnval
= download_from_url_to_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
927 elif(httplibuse
=="httpx2"):
928 returnval
= download_from_url_to_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
929 elif(httplibuse
=="httpcore"):
930 returnval
= download_from_url_to_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
931 elif(httplibuse
=="httpcore2"):
932 returnval
= download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
933 elif(httplibuse
=="mechanize"):
934 returnval
= download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
935 elif(httplibuse
=="pycurl"):
936 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
937 elif(httplibuse
=="pycurl2"):
938 returnval
= download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
939 elif(httplibuse
=="pycurl3"):
940 returnval
= download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
941 elif(httplibuse
=="ftp"):
942 returnval
= download_from_url_to_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
943 elif(httplibuse
=="sftp"):
944 returnval
= download_from_url_to_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
945 elif(httplibuse
=="pysftp"):
946 returnval
= download_from_url_to_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
951 def download_from_url_to_file_with_list(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
952 if(isinstance(httpurl
, list)):
954 elif(isinstance(httpurl
, tuple)):
956 elif(isinstance(httpurl
, dict)):
957 httpurl
= httpurl
.values();
960 listsize
= len(httpurl
);
963 while(listcount
<listsize
):
964 ouputval
= download_from_url_to_file(httpurl
[listcount
], httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
965 returnval
.append(ouputval
);
969 def download_from_url_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
970 global geturls_download_sleep
, havezstd
, havebrotli
;
972 sleep
= geturls_download_sleep
;
975 urlparts
= urlparse
.urlparse(httpurl
);
976 if(isinstance(httpheaders
, list)):
977 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
978 httpheaders
= fix_header_names(httpheaders
);
979 if(httpuseragent
is not None):
980 if('User-Agent' in httpheaders
):
981 httpheaders
['User-Agent'] = httpuseragent
;
983 httpuseragent
.update({'User-Agent': httpuseragent
});
984 if(httpreferer
is not None):
985 if('Referer' in httpheaders
):
986 httpheaders
['Referer'] = httpreferer
;
988 httpuseragent
.update({'Referer': httpreferer
});
989 if(urlparts
.username
is not None or urlparts
.password
is not None):
990 if(sys
.version
[0]=="2"):
991 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
992 if(sys
.version
[0]>="3"):
993 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
994 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
995 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
996 if(isinstance(httpheaders
, dict)):
997 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
998 geturls_opener
.addheaders
= httpheaders
;
1000 if(postdata
is not None and not isinstance(postdata
, dict)):
1001 postdata
= urlencode(postdata
);
1003 geturls_request
= Request(httpurl
);
1004 if(httpmethod
=="GET"):
1005 geturls_text
= geturls_opener
.open(geturls_request
);
1006 elif(httpmethod
=="POST"):
1007 geturls_text
= geturls_opener
.open(geturls_request
, data
=postdata
);
1009 geturls_text
= geturls_opener
.open(geturls_request
);
1010 except HTTPError
as geturls_text_error
:
1011 geturls_text
= geturls_text_error
;
1012 log
.info("Error With URL "+httpurl
);
1014 log
.info("Error With URL "+httpurl
);
1016 except socket
.timeout
:
1017 log
.info("Error With URL "+httpurl
);
1019 httpcodeout
= geturls_text
.getcode();
1021 httpcodereason
= geturls_text
.reason
;
1022 except AttributeError:
1023 httpcodereason
= http_status_to_reason(geturls_text
.getcode());
1025 httpversionout
= geturls_text
.version
;
1026 except AttributeError:
1027 httpversionout
= "1.1";
1028 httpmethodout
= geturls_request
.get_method();
1029 httpurlout
= geturls_text
.geturl();
1030 httpheaderout
= geturls_text
.info();
1031 httpheadersentout
= httpheaders
;
1032 if(isinstance(httpheaderout
, list)):
1033 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1034 httpheaderout
= fix_header_names(httpheaderout
);
1035 if(sys
.version
[0]=="2"):
1037 prehttpheaderout
= httpheaderout
;
1038 httpheaderkeys
= httpheaderout
.keys();
1039 imax
= len(httpheaderkeys
);
1043 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1045 except AttributeError:
1047 if(isinstance(httpheadersentout
, list)):
1048 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1049 httpheadersentout
= fix_header_names(httpheadersentout
);
1050 downloadsize
= httpheaderout
.get('Content-Length');
1051 if(downloadsize
is not None):
1052 downloadsize
= int(downloadsize
);
1053 if downloadsize
is None: downloadsize
= 0;
1056 log
.info("Downloading URL "+httpurl
);
1057 with
BytesIO() as strbuf
:
1059 databytes
= geturls_text
.read(buffersize
);
1060 if not databytes
: break;
1061 datasize
= len(databytes
);
1062 fulldatasize
= datasize
+ fulldatasize
;
1065 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1066 downloaddiff
= fulldatasize
- prevdownsize
;
1067 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1068 prevdownsize
= fulldatasize
;
1069 strbuf
.write(databytes
);
1071 returnval_content
= strbuf
.read();
1072 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1074 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1077 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1079 returnval_content
= zlib
.decompress(returnval_content
);
1082 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1084 returnval_content
= brotli
.decompress(returnval_content
);
1085 except brotli
.error
:
1087 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1089 returnval_content
= zstandard
.decompress(returnval_content
);
1090 except zstandard
.error
:
1092 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1094 returnval_content
= lzma
.decompress(returnval_content
);
1095 except zstandard
.error
:
1097 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1099 returnval_content
= bz2
.decompress(returnval_content
);
1100 except zstandard
.error
:
1102 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "urllib"};
1103 geturls_text
.close();
1106 def download_from_url_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1107 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1108 exec_time_start
= time
.time();
1109 myhash
= hashlib
.new("sha1");
1110 if(sys
.version
[0]=="2"):
1111 myhash
.update(httpurl
);
1112 myhash
.update(str(buffersize
));
1113 myhash
.update(str(exec_time_start
));
1114 if(sys
.version
[0]>="3"):
1115 myhash
.update(httpurl
.encode('utf-8'));
1116 myhash
.update(str(buffersize
).encode('utf-8'));
1117 myhash
.update(str(exec_time_start
).encode('utf-8'));
1118 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1120 sleep
= geturls_download_sleep
;
1123 pretmpfilename
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1124 if(not pretmpfilename
):
1126 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1127 tmpfilename
= f
.name
;
1129 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1130 except AttributeError:
1132 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1137 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1138 f
.write(pretmpfilename
.get('Content'));
1140 exec_time_end
= time
.time();
1141 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1142 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1145 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1146 global geturls_download_sleep
, havezstd
, havebrotli
;
1148 sleep
= geturls_download_sleep
;
1151 if(not outfile
=="-"):
1152 outpath
= outpath
.rstrip(os
.path
.sep
);
1153 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1154 if(not os
.path
.exists(outpath
)):
1155 os
.makedirs(outpath
);
1156 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1158 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1160 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1161 if(not pretmpfilename
):
1163 tmpfilename
= pretmpfilename
.get('Filename');
1164 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1166 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1167 exec_time_start
= time
.time();
1168 shutil
.move(tmpfilename
, filepath
);
1170 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1171 except AttributeError:
1173 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1178 exec_time_end
= time
.time();
1179 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1180 if(os
.path
.exists(tmpfilename
)):
1181 os
.remove(tmpfilename
);
1182 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1184 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1185 tmpfilename
= pretmpfilename
.get('Filename');
1186 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1189 exec_time_start
= time
.time();
1190 with
open(tmpfilename
, 'rb') as ft
:
1193 databytes
= ft
.read(buffersize
[1]);
1194 if not databytes
: break;
1195 datasize
= len(databytes
);
1196 fulldatasize
= datasize
+ fulldatasize
;
1199 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1200 downloaddiff
= fulldatasize
- prevdownsize
;
1201 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1202 prevdownsize
= fulldatasize
;
1205 fdata
= f
.getvalue();
1208 os
.remove(tmpfilename
);
1209 exec_time_end
= time
.time();
1210 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1211 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1214 def download_from_url_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1215 global geturls_download_sleep
, havezstd
, havebrotli
, havezstd
, havebrotli
;
1217 sleep
= geturls_download_sleep
;
1220 urlparts
= urlparse
.urlparse(httpurl
);
1221 if(isinstance(httpheaders
, list)):
1222 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1223 httpheaders
= fix_header_names(httpheaders
);
1224 if(httpuseragent
is not None):
1225 if('User-Agent' in httpheaders
):
1226 httpheaders
['User-Agent'] = httpuseragent
;
1228 httpuseragent
.update({'User-Agent': httpuseragent
});
1229 if(httpreferer
is not None):
1230 if('Referer' in httpheaders
):
1231 httpheaders
['Referer'] = httpreferer
;
1233 httpuseragent
.update({'Referer': httpreferer
});
1234 if(urlparts
.username
is not None or urlparts
.password
is not None):
1235 if(sys
.version
[0]=="2"):
1236 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1237 if(sys
.version
[0]>="3"):
1238 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1239 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1240 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1241 geturls_opener
.addheaders
= httpheaders
;
1243 if(urlparts
[0]=="http"):
1244 httpconn
= HTTPConnection(urlparts
[1], timeout
=timeout
);
1245 elif(urlparts
[0]=="https"):
1246 httpconn
= HTTPSConnection(urlparts
[1], timeout
=timeout
);
1249 if(postdata
is not None and not isinstance(postdata
, dict)):
1250 postdata
= urlencode(postdata
);
1252 if(httpmethod
=="GET"):
1253 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1254 elif(httpmethod
=="POST"):
1255 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1257 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1258 except socket
.timeout
:
1259 log
.info("Error With URL "+httpurl
);
1261 except socket
.gaierror
:
1262 log
.info("Error With URL "+httpurl
);
1264 except BlockingIOError
:
1265 log
.info("Error With URL "+httpurl
);
1267 geturls_text
= httpconn
.getresponse();
1268 httpcodeout
= geturls_text
.status
;
1269 httpcodereason
= geturls_text
.reason
;
1270 if(geturls_text
.version
=="10"):
1271 httpversionout
= "1.0";
1273 httpversionout
= "1.1";
1274 httpmethodout
= geturls_text
._method
;
1275 httpurlout
= httpurl
;
1276 httpheaderout
= geturls_text
.getheaders();
1277 httpheadersentout
= httpheaders
;
1278 if(isinstance(httpheaderout
, list)):
1279 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1280 if(sys
.version
[0]=="2"):
1282 prehttpheaderout
= httpheaderout
;
1283 httpheaderkeys
= httpheaderout
.keys();
1284 imax
= len(httpheaderkeys
);
1288 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1290 except AttributeError:
1292 httpheaderout
= fix_header_names(httpheaderout
);
1293 if(isinstance(httpheadersentout
, list)):
1294 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1295 httpheadersentout
= fix_header_names(httpheadersentout
);
1296 downloadsize
= httpheaderout
.get('Content-Length');
1297 if(downloadsize
is not None):
1298 downloadsize
= int(downloadsize
);
1299 if downloadsize
is None: downloadsize
= 0;
1302 log
.info("Downloading URL "+httpurl
);
1303 with
BytesIO() as strbuf
:
1305 databytes
= geturls_text
.read(buffersize
);
1306 if not databytes
: break;
1307 datasize
= len(databytes
);
1308 fulldatasize
= datasize
+ fulldatasize
;
1311 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1312 downloaddiff
= fulldatasize
- prevdownsize
;
1313 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1314 prevdownsize
= fulldatasize
;
1315 strbuf
.write(databytes
);
1317 returnval_content
= strbuf
.read();
1318 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1320 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1323 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1325 returnval_content
= zlib
.decompress(returnval_content
);
1328 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1330 returnval_content
= brotli
.decompress(returnval_content
);
1331 except brotli
.error
:
1333 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1335 returnval_content
= zstandard
.decompress(returnval_content
);
1336 except zstandard
.error
:
1338 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1340 returnval_content
= lzma
.decompress(returnval_content
);
1341 except zstandard
.error
:
1343 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1345 returnval_content
= bz2
.decompress(returnval_content
);
1346 except zstandard
.error
:
1348 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httplib"};
1349 geturls_text
.close();
1352 def download_from_url_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1353 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1354 exec_time_start
= time
.time();
1355 myhash
= hashlib
.new("sha1");
1356 if(sys
.version
[0]=="2"):
1357 myhash
.update(httpurl
);
1358 myhash
.update(str(buffersize
));
1359 myhash
.update(str(exec_time_start
));
1360 if(sys
.version
[0]>="3"):
1361 myhash
.update(httpurl
.encode('utf-8'));
1362 myhash
.update(str(buffersize
).encode('utf-8'));
1363 myhash
.update(str(exec_time_start
).encode('utf-8'));
1364 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1366 sleep
= geturls_download_sleep
;
1369 pretmpfilename
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1370 if(not pretmpfilename
):
1372 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1373 tmpfilename
= f
.name
;
1375 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1376 except AttributeError:
1378 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1383 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1384 f
.write(pretmpfilename
.get('Content'));
1386 exec_time_end
= time
.time();
1387 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1388 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1391 def download_from_url_to_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1392 global geturls_download_sleep
, havezstd
, havebrotli
;
1394 sleep
= geturls_download_sleep
;
1397 if(not outfile
=="-"):
1398 outpath
= outpath
.rstrip(os
.path
.sep
);
1399 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1400 if(not os
.path
.exists(outpath
)):
1401 os
.makedirs(outpath
);
1402 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1404 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1406 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1407 if(not pretmpfilename
):
1409 tmpfilename
= pretmpfilename
.get('Filename');
1410 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1412 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1413 exec_time_start
= time
.time();
1414 shutil
.move(tmpfilename
, filepath
);
1416 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1417 except AttributeError:
1419 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1424 exec_time_end
= time
.time();
1425 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1426 if(os
.path
.exists(tmpfilename
)):
1427 os
.remove(tmpfilename
);
1428 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1430 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1431 tmpfilename
= pretmpfilename
.get('Filename');
1432 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1435 exec_time_start
= time
.time();
1436 with
open(tmpfilename
, 'rb') as ft
:
1439 databytes
= ft
.read(buffersize
[1]);
1440 if not databytes
: break;
1441 datasize
= len(databytes
);
1442 fulldatasize
= datasize
+ fulldatasize
;
1445 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1446 downloaddiff
= fulldatasize
- prevdownsize
;
1447 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1448 prevdownsize
= fulldatasize
;
1451 fdata
= f
.getvalue();
1454 os
.remove(tmpfilename
);
1455 exec_time_end
= time
.time();
1456 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1457 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1461 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1462 global geturls_download_sleep
, havezstd
, havebrotli
;
1464 sleep
= geturls_download_sleep
;
1467 urlparts
= urlparse
.urlparse(httpurl
);
1468 if(isinstance(httpheaders
, list)):
1469 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1470 httpheaders
= fix_header_names(httpheaders
);
1471 if(httpuseragent
is not None):
1472 if('User-Agent' in httpheaders
):
1473 httpheaders
['User-Agent'] = httpuseragent
;
1475 httpuseragent
.update({'User-Agent': httpuseragent
});
1476 if(httpreferer
is not None):
1477 if('Referer' in httpheaders
):
1478 httpheaders
['Referer'] = httpreferer
;
1480 httpuseragent
.update({'Referer': httpreferer
});
1481 if(urlparts
.username
is not None or urlparts
.password
is not None):
1482 if(sys
.version
[0]=="2"):
1483 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1484 if(sys
.version
[0]>="3"):
1485 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1486 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1487 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1488 geturls_opener
.addheaders
= httpheaders
;
1490 if(urlparts
[0]=="http"):
1491 httpconn
= HTTPConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
1492 elif(urlparts
[0]=="https"):
1493 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
1496 if(postdata
is not None and not isinstance(postdata
, dict)):
1497 postdata
= urlencode(postdata
);
1499 if(httpmethod
=="GET"):
1500 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1501 elif(httpmethod
=="POST"):
1502 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1504 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1505 except socket
.timeout
:
1506 log
.info("Error With URL "+httpurl
);
1508 except socket
.gaierror
:
1509 log
.info("Error With URL "+httpurl
);
1511 except BlockingIOError
:
1512 log
.info("Error With URL "+httpurl
);
1514 geturls_text
= httpconn
.getresponse();
1515 httpcodeout
= geturls_text
.status
;
1516 httpcodereason
= geturls_text
.reason
;
1517 if(geturls_text
.version
=="10"):
1518 httpversionout
= "1.0";
1520 httpversionout
= "1.1";
1521 httpmethodout
= httpmethod
;
1522 httpurlout
= httpurl
;
1523 httpheaderout
= geturls_text
.getheaders();
1524 httpheadersentout
= httpheaders
;
1525 if(isinstance(httpheaderout
, list)):
1526 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1527 if(sys
.version
[0]=="2"):
1529 prehttpheaderout
= httpheaderout
;
1530 httpheaderkeys
= httpheaderout
.keys();
1531 imax
= len(httpheaderkeys
);
1535 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1537 except AttributeError:
1539 httpheaderout
= fix_header_names(httpheaderout
);
1540 if(isinstance(httpheadersentout
, list)):
1541 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1542 httpheadersentout
= fix_header_names(httpheadersentout
);
1543 downloadsize
= httpheaderout
.get('Content-Length');
1544 if(downloadsize
is not None):
1545 downloadsize
= int(downloadsize
);
1546 if downloadsize
is None: downloadsize
= 0;
1549 log
.info("Downloading URL "+httpurl
);
1550 with
BytesIO() as strbuf
:
1552 databytes
= geturls_text
.read(buffersize
);
1553 if not databytes
: break;
1554 datasize
= len(databytes
);
1555 fulldatasize
= datasize
+ fulldatasize
;
1558 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1559 downloaddiff
= fulldatasize
- prevdownsize
;
1560 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1561 prevdownsize
= fulldatasize
;
1562 strbuf
.write(databytes
);
1564 returnval_content
= strbuf
.read();
1565 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1567 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1570 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1572 returnval_content
= zlib
.decompress(returnval_content
);
1575 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1577 returnval_content
= brotli
.decompress(returnval_content
);
1578 except brotli
.error
:
1580 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1582 returnval_content
= zstandard
.decompress(returnval_content
);
1583 except zstandard
.error
:
1585 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1587 returnval_content
= lzma
.decompress(returnval_content
);
1588 except zstandard
.error
:
1590 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1592 returnval_content
= bz2
.decompress(returnval_content
);
1593 except zstandard
.error
:
1595 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httplib2"};
1596 geturls_text
.close();
1599 if(not havehttplib2
):
1600 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1601 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1605 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1606 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1607 exec_time_start
= time
.time();
1608 myhash
= hashlib
.new("sha1");
1609 if(sys
.version
[0]=="2"):
1610 myhash
.update(httpurl
);
1611 myhash
.update(str(buffersize
));
1612 myhash
.update(str(exec_time_start
));
1613 if(sys
.version
[0]>="3"):
1614 myhash
.update(httpurl
.encode('utf-8'));
1615 myhash
.update(str(buffersize
).encode('utf-8'));
1616 myhash
.update(str(exec_time_start
).encode('utf-8'));
1617 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1619 sleep
= geturls_download_sleep
;
1622 pretmpfilename
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1623 if(not pretmpfilename
):
1625 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1626 tmpfilename
= f
.name
;
1628 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1629 except AttributeError:
1631 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1636 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1637 f
.write(pretmpfilename
.get('Content'));
1639 exec_time_end
= time
.time();
1640 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1641 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1644 if(not havehttplib2
):
1645 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1646 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1650 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1651 global geturls_download_sleep
, havezstd
, havebrotli
;
1653 sleep
= geturls_download_sleep
;
1656 if(not outfile
=="-"):
1657 outpath
= outpath
.rstrip(os
.path
.sep
);
1658 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1659 if(not os
.path
.exists(outpath
)):
1660 os
.makedirs(outpath
);
1661 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1663 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1665 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1666 if(not pretmpfilename
):
1668 tmpfilename
= pretmpfilename
.get('Filename');
1669 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1671 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1672 exec_time_start
= time
.time();
1673 shutil
.move(tmpfilename
, filepath
);
1675 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1676 except AttributeError:
1678 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1683 exec_time_end
= time
.time();
1684 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1685 if(os
.path
.exists(tmpfilename
)):
1686 os
.remove(tmpfilename
);
1687 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1689 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1690 tmpfilename
= pretmpfilename
.get('Filename');
1691 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1694 exec_time_start
= time
.time();
1695 with
open(tmpfilename
, 'rb') as ft
:
1698 databytes
= ft
.read(buffersize
[1]);
1699 if not databytes
: break;
1700 datasize
= len(databytes
);
1701 fulldatasize
= datasize
+ fulldatasize
;
1704 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1705 downloaddiff
= fulldatasize
- prevdownsize
;
1706 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1707 prevdownsize
= fulldatasize
;
1710 fdata
= f
.getvalue();
1713 os
.remove(tmpfilename
);
1714 exec_time_end
= time
.time();
1715 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1716 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1719 if(not havehttplib2
):
1720 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1721 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1724 def download_from_url_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1725 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1728 def download_from_url_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1729 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1732 def download_from_url_to_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1733 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1737 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1738 global geturls_download_sleep
, havezstd
, havebrotli
;
1740 sleep
= geturls_download_sleep
;
1743 urlparts
= urlparse
.urlparse(httpurl
);
1744 if(isinstance(httpheaders
, list)):
1745 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1746 httpheaders
= fix_header_names(httpheaders
);
1747 if(httpuseragent
is not None):
1748 if('User-Agent' in httpheaders
):
1749 httpheaders
['User-Agent'] = httpuseragent
;
1751 httpuseragent
.update({'User-Agent': httpuseragent
});
1752 if(httpreferer
is not None):
1753 if('Referer' in httpheaders
):
1754 httpheaders
['Referer'] = httpreferer
;
1756 httpuseragent
.update({'Referer': httpreferer
});
1757 if(urlparts
.username
is not None or urlparts
.password
is not None):
1758 if(sys
.version
[0]=="2"):
1759 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1760 if(sys
.version
[0]>="3"):
1761 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1762 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1764 if(postdata
is not None and not isinstance(postdata
, dict)):
1765 postdata
= urlencode(postdata
);
1767 reqsession
= requests
.Session();
1768 if(httpmethod
=="GET"):
1769 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1770 elif(httpmethod
=="POST"):
1771 geturls_text
= reqsession
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1773 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1774 except requests
.exceptions
.ConnectTimeout
:
1775 log
.info("Error With URL "+httpurl
);
1777 except requests
.exceptions
.ConnectError
:
1778 log
.info("Error With URL "+httpurl
);
1780 except socket
.timeout
:
1781 log
.info("Error With URL "+httpurl
);
1783 httpcodeout
= geturls_text
.status_code
;
1784 httpcodereason
= geturls_text
.reason
;
1785 if(geturls_text
.raw
.version
=="10"):
1786 httpversionout
= "1.0";
1788 httpversionout
= "1.1";
1789 httpmethodout
= httpmethod
;
1790 httpurlout
= geturls_text
.url
;
1791 httpheaderout
= geturls_text
.headers
;
1792 httpheadersentout
= geturls_text
.request
.headers
;
1793 if(isinstance(httpheaderout
, list)):
1794 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1795 if(sys
.version
[0]=="2"):
1797 prehttpheaderout
= httpheaderout
;
1798 httpheaderkeys
= httpheaderout
.keys();
1799 imax
= len(httpheaderkeys
);
1803 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1805 except AttributeError:
1807 httpheaderout
= fix_header_names(httpheaderout
);
1808 if(isinstance(httpheadersentout
, list)):
1809 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1810 httpheadersentout
= fix_header_names(httpheadersentout
);
1811 downloadsize
= httpheaderout
.get('Content-Length');
1812 if(downloadsize
is not None):
1813 downloadsize
= int(downloadsize
);
1814 if downloadsize
is None: downloadsize
= 0;
1817 log
.info("Downloading URL "+httpurl
);
1818 with
BytesIO() as strbuf
:
1820 databytes
= geturls_text
.raw
.read(buffersize
);
1821 if not databytes
: break;
1822 datasize
= len(databytes
);
1823 fulldatasize
= datasize
+ fulldatasize
;
1826 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1827 downloaddiff
= fulldatasize
- prevdownsize
;
1828 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1829 prevdownsize
= fulldatasize
;
1830 strbuf
.write(databytes
);
1832 returnval_content
= strbuf
.read();
1833 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1835 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1838 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1840 returnval_content
= zlib
.decompress(returnval_content
);
1843 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1845 returnval_content
= brotli
.decompress(returnval_content
);
1846 except brotli
.error
:
1848 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1850 returnval_content
= zstandard
.decompress(returnval_content
);
1851 except zstandard
.error
:
1853 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1855 returnval_content
= lzma
.decompress(returnval_content
);
1856 except zstandard
.error
:
1858 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1860 returnval_content
= bz2
.decompress(returnval_content
);
1861 except zstandard
.error
:
1863 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "requests"};
1864 geturls_text
.close();
1867 if(not haverequests
):
1868 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1869 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1873 def download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1874 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1875 exec_time_start
= time
.time();
1876 myhash
= hashlib
.new("sha1");
1877 if(sys
.version
[0]=="2"):
1878 myhash
.update(httpurl
);
1879 myhash
.update(str(buffersize
));
1880 myhash
.update(str(exec_time_start
));
1881 if(sys
.version
[0]>="3"):
1882 myhash
.update(httpurl
.encode('utf-8'));
1883 myhash
.update(str(buffersize
).encode('utf-8'));
1884 myhash
.update(str(exec_time_start
).encode('utf-8'));
1885 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1887 sleep
= geturls_download_sleep
;
1890 pretmpfilename
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1891 if(not pretmpfilename
):
1893 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1894 tmpfilename
= f
.name
;
1896 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1897 except AttributeError:
1899 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1904 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1905 f
.write(pretmpfilename
.get('Content'));
1907 exec_time_end
= time
.time();
1908 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1909 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1912 if(not haverequests
):
1913 def download_from_url_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1914 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1918 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1919 global geturls_download_sleep
, havezstd
, havebrotli
;
1921 sleep
= geturls_download_sleep
;
1924 if(not outfile
=="-"):
1925 outpath
= outpath
.rstrip(os
.path
.sep
);
1926 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1927 if(not os
.path
.exists(outpath
)):
1928 os
.makedirs(outpath
);
1929 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1931 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1933 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1934 if(not pretmpfilename
):
1936 tmpfilename
= pretmpfilename
.get('Filename');
1937 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1939 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1940 exec_time_start
= time
.time();
1941 shutil
.move(tmpfilename
, filepath
);
1943 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1944 except AttributeError:
1946 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1951 exec_time_end
= time
.time();
1952 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1953 if(os
.path
.exists(tmpfilename
)):
1954 os
.remove(tmpfilename
);
1955 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1957 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1958 tmpfilename
= pretmpfilename
.get('Filename');
1959 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1962 exec_time_start
= time
.time();
1963 with
open(tmpfilename
, 'rb') as ft
:
1966 databytes
= ft
.read(buffersize
[1]);
1967 if not databytes
: break;
1968 datasize
= len(databytes
);
1969 fulldatasize
= datasize
+ fulldatasize
;
1972 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1973 downloaddiff
= fulldatasize
- prevdownsize
;
1974 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1975 prevdownsize
= fulldatasize
;
1978 fdata
= f
.getvalue();
1981 os
.remove(tmpfilename
);
1982 exec_time_end
= time
.time();
1983 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1984 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1987 if(not haverequests
):
1988 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1989 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1993 def download_from_url_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1994 global geturls_download_sleep
, havezstd
, havebrotli
;
1996 sleep
= geturls_download_sleep
;
1999 urlparts
= urlparse
.urlparse(httpurl
);
2000 if(isinstance(httpheaders
, list)):
2001 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2002 httpheaders
= fix_header_names(httpheaders
);
2003 if(httpuseragent
is not None):
2004 if('User-Agent' in httpheaders
):
2005 httpheaders
['User-Agent'] = httpuseragent
;
2007 httpuseragent
.update({'User-Agent': httpuseragent
});
2008 if(httpreferer
is not None):
2009 if('Referer' in httpheaders
):
2010 httpheaders
['Referer'] = httpreferer
;
2012 httpuseragent
.update({'Referer': httpreferer
});
2013 if(urlparts
.username
is not None or urlparts
.password
is not None):
2014 if(sys
.version
[0]=="2"):
2015 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2016 if(sys
.version
[0]>="3"):
2017 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2018 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2020 if(postdata
is not None and not isinstance(postdata
, dict)):
2021 postdata
= urlencode(postdata
);
2023 reqsession
= aiohttp
.ClientSession(cookie_jar
=httpcookie
, headers
=httpheaders
, timeout
=timeout
, read_timeout
=timeout
, conn_timeout
=timeout
, read_bufsize
=buffersize
);
2024 if(httpmethod
=="GET"):
2025 geturls_text
= reqsession
.get(httpurl
);
2026 elif(httpmethod
=="POST"):
2027 geturls_text
= reqsession
.post(httpurl
, data
=postdata
);
2029 geturls_text
= reqsession
.get(httpurl
);
2030 except aiohttp
.exceptions
.ConnectTimeout
:
2031 log
.info("Error With URL "+httpurl
);
2033 except aiohttp
.exceptions
.ConnectError
:
2034 log
.info("Error With URL "+httpurl
);
2036 except socket
.timeout
:
2037 log
.info("Error With URL "+httpurl
);
2039 httpcodeout
= geturls_text
.status
;
2040 httpcodereason
= geturls_text
.reason
;
2041 httpversionout
= geturls_text
.version
;
2042 httpmethodout
= geturls_text
.method
;
2043 httpurlout
= geturls_text
.url
;
2044 httpheaderout
= geturls_text
.headers
;
2045 httpheadersentout
= geturls_text
.request_info
.headers
;
2046 if(isinstance(httpheaderout
, list)):
2047 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2048 if(sys
.version
[0]=="2"):
2050 prehttpheaderout
= httpheaderout
;
2051 httpheaderkeys
= httpheaderout
.keys();
2052 imax
= len(httpheaderkeys
);
2056 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2058 except AttributeError:
2060 httpheaderout
= fix_header_names(httpheaderout
);
2061 if(isinstance(httpheadersentout
, list)):
2062 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2063 httpheadersentout
= fix_header_names(httpheadersentout
);
2064 downloadsize
= httpheaderout
.get('Content-Length');
2065 if(downloadsize
is not None):
2066 downloadsize
= int(downloadsize
);
2067 if downloadsize
is None: downloadsize
= 0;
2070 log
.info("Downloading URL "+httpurl
);
2071 with
BytesIO() as strbuf
:
2073 databytes
= geturls_text
.read(buffersize
);
2074 if not databytes
: break;
2075 datasize
= len(databytes
);
2076 fulldatasize
= datasize
+ fulldatasize
;
2079 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2080 downloaddiff
= fulldatasize
- prevdownsize
;
2081 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2082 prevdownsize
= fulldatasize
;
2083 strbuf
.write(databytes
);
2085 returnval_content
= strbuf
.read();
2086 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2088 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2091 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2093 returnval_content
= zlib
.decompress(returnval_content
);
2096 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2098 returnval_content
= brotli
.decompress(returnval_content
);
2099 except brotli
.error
:
2101 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2103 returnval_content
= zstandard
.decompress(returnval_content
);
2104 except zstandard
.error
:
2106 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2108 returnval_content
= lzma
.decompress(returnval_content
);
2109 except zstandard
.error
:
2111 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2113 returnval_content
= bz2
.decompress(returnval_content
);
2114 except zstandard
.error
:
2116 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "aiohttp"};
2117 geturls_text
.close();
2120 if(not haveaiohttp
):
2121 def download_from_url_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2122 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2126 def download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2127 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2128 exec_time_start
= time
.time();
2129 myhash
= hashlib
.new("sha1");
2130 if(sys
.version
[0]=="2"):
2131 myhash
.update(httpurl
);
2132 myhash
.update(str(buffersize
));
2133 myhash
.update(str(exec_time_start
));
2134 if(sys
.version
[0]>="3"):
2135 myhash
.update(httpurl
.encode('utf-8'));
2136 myhash
.update(str(buffersize
).encode('utf-8'));
2137 myhash
.update(str(exec_time_start
).encode('utf-8'));
2138 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2140 sleep
= geturls_download_sleep
;
2143 pretmpfilename
= download_from_url_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2144 if(not pretmpfilename
):
2146 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2147 tmpfilename
= f
.name
;
2149 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2150 except AttributeError:
2152 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2157 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2158 f
.write(pretmpfilename
.get('Content'));
2160 exec_time_end
= time
.time();
2161 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2162 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2165 if(not haveaiohttp
):
2166 def download_from_url_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2167 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2171 def download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2172 global geturls_download_sleep
, havezstd
, havebrotli
;
2174 sleep
= geturls_download_sleep
;
2177 if(not outfile
=="-"):
2178 outpath
= outpath
.rstrip(os
.path
.sep
);
2179 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2180 if(not os
.path
.exists(outpath
)):
2181 os
.makedirs(outpath
);
2182 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2184 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2186 pretmpfilename
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2187 if(not pretmpfilename
):
2189 tmpfilename
= pretmpfilename
.get('Filename');
2190 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2192 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2193 exec_time_start
= time
.time();
2194 shutil
.move(tmpfilename
, filepath
);
2196 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2197 except AttributeError:
2199 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2204 exec_time_end
= time
.time();
2205 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2206 if(os
.path
.exists(tmpfilename
)):
2207 os
.remove(tmpfilename
);
2208 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2210 pretmpfilename
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2211 tmpfilename
= pretmpfilename
.get('Filename');
2212 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2215 exec_time_start
= time
.time();
2216 with
open(tmpfilename
, 'rb') as ft
:
2219 databytes
= ft
.read(buffersize
[1]);
2220 if not databytes
: break;
2221 datasize
= len(databytes
);
2222 fulldatasize
= datasize
+ fulldatasize
;
2225 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2226 downloaddiff
= fulldatasize
- prevdownsize
;
2227 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2228 prevdownsize
= fulldatasize
;
2231 fdata
= f
.getvalue();
2234 os
.remove(tmpfilename
);
2235 exec_time_end
= time
.time();
2236 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2237 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2240 if(not haveaiohttp
):
2241 def download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2242 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2246 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2247 global geturls_download_sleep
, havezstd
, havebrotli
;
2249 sleep
= geturls_download_sleep
;
2252 urlparts
= urlparse
.urlparse(httpurl
);
2253 if(isinstance(httpheaders
, list)):
2254 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2255 httpheaders
= fix_header_names(httpheaders
);
2256 if(httpuseragent
is not None):
2257 if('User-Agent' in httpheaders
):
2258 httpheaders
['User-Agent'] = httpuseragent
;
2260 httpuseragent
.update({'User-Agent': httpuseragent
});
2261 if(httpreferer
is not None):
2262 if('Referer' in httpheaders
):
2263 httpheaders
['Referer'] = httpreferer
;
2265 httpuseragent
.update({'Referer': httpreferer
});
2266 if(urlparts
.username
is not None or urlparts
.password
is not None):
2267 if(sys
.version
[0]=="2"):
2268 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2269 if(sys
.version
[0]>="3"):
2270 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2271 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2273 if(postdata
is not None and not isinstance(postdata
, dict)):
2274 postdata
= urlencode(postdata
);
2276 if(httpmethod
=="GET"):
2277 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2278 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2279 elif(httpmethod
=="POST"):
2280 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2281 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2283 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2284 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2285 except httpx
.ConnectTimeout
:
2286 log
.info("Error With URL "+httpurl
);
2288 except httpx
.ConnectError
:
2289 log
.info("Error With URL "+httpurl
);
2291 except socket
.timeout
:
2292 log
.info("Error With URL "+httpurl
);
2294 httpcodeout
= geturls_text
.status_code
;
2296 httpcodereason
= geturls_text
.reason_phrase
;
2298 httpcodereason
= http_status_to_reason(geturls_text
.status_code
);
2299 httpversionout
= geturls_text
.http_version
;
2300 httpmethodout
= httpmethod
;
2301 httpurlout
= str(geturls_text
.url
);
2302 httpheaderout
= geturls_text
.headers
;
2303 httpheadersentout
= geturls_text
.request
.headers
;
2304 if(isinstance(httpheaderout
, list)):
2305 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2306 if(sys
.version
[0]=="2"):
2308 prehttpheaderout
= httpheaderout
;
2309 httpheaderkeys
= httpheaderout
.keys();
2310 imax
= len(httpheaderkeys
);
2314 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2316 except AttributeError:
2318 httpheaderout
= fix_header_names(httpheaderout
);
2319 if(isinstance(httpheadersentout
, list)):
2320 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2321 httpheadersentout
= fix_header_names(httpheadersentout
);
2322 downloadsize
= httpheaderout
.get('Content-Length');
2323 if(downloadsize
is not None):
2324 downloadsize
= int(downloadsize
);
2325 if downloadsize
is None: downloadsize
= 0;
2328 log
.info("Downloading URL "+httpurl
);
2329 with
BytesIO() as strbuf
:
2331 databytes
= geturls_text
.read();
2332 if not databytes
: break;
2333 datasize
= len(databytes
);
2334 fulldatasize
= datasize
+ fulldatasize
;
2337 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2338 downloaddiff
= fulldatasize
- prevdownsize
;
2339 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2340 prevdownsize
= fulldatasize
;
2341 strbuf
.write(databytes
);
2344 returnval_content
= strbuf
.read();
2345 geturls_text
.close();
2346 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2348 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2351 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2353 returnval_content
= zlib
.decompress(returnval_content
);
2356 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2358 returnval_content
= brotli
.decompress(returnval_content
);
2359 except brotli
.error
:
2361 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2363 returnval_content
= zstandard
.decompress(returnval_content
);
2364 except zstandard
.error
:
2366 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2368 returnval_content
= lzma
.decompress(returnval_content
);
2369 except zstandard
.error
:
2371 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2373 returnval_content
= bz2
.decompress(returnval_content
);
2374 except zstandard
.error
:
2376 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpx"};
2377 geturls_text
.close();
2381 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2382 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2386 def download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2387 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2388 exec_time_start
= time
.time();
2389 myhash
= hashlib
.new("sha1");
2390 if(sys
.version
[0]=="2"):
2391 myhash
.update(httpurl
);
2392 myhash
.update(str(buffersize
));
2393 myhash
.update(str(exec_time_start
));
2394 if(sys
.version
[0]>="3"):
2395 myhash
.update(httpurl
.encode('utf-8'));
2396 myhash
.update(str(buffersize
).encode('utf-8'));
2397 myhash
.update(str(exec_time_start
).encode('utf-8'));
2398 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2400 sleep
= geturls_download_sleep
;
2403 pretmpfilename
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2404 if(not pretmpfilename
):
2406 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2407 tmpfilename
= f
.name
;
2409 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2410 except AttributeError:
2412 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2417 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2418 f
.write(pretmpfilename
.get('Content'));
2420 exec_time_end
= time
.time();
2421 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2422 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2426 def download_from_url_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2427 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2431 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2432 global geturls_download_sleep
, havezstd
, havebrotli
;
2434 sleep
= geturls_download_sleep
;
2437 if(not outfile
=="-"):
2438 outpath
= outpath
.rstrip(os
.path
.sep
);
2439 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2440 if(not os
.path
.exists(outpath
)):
2441 os
.makedirs(outpath
);
2442 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2444 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2446 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2447 if(not pretmpfilename
):
2449 tmpfilename
= pretmpfilename
.get('Filename');
2450 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2452 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2453 exec_time_start
= time
.time();
2454 shutil
.move(tmpfilename
, filepath
);
2456 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2457 except AttributeError:
2459 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2464 exec_time_end
= time
.time();
2465 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2466 if(os
.path
.exists(tmpfilename
)):
2467 os
.remove(tmpfilename
);
2468 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2470 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2471 tmpfilename
= pretmpfilename
.get('Filename');
2472 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2475 exec_time_start
= time
.time();
2476 with
open(tmpfilename
, 'rb') as ft
:
2479 databytes
= ft
.read(buffersize
[1]);
2480 if not databytes
: break;
2481 datasize
= len(databytes
);
2482 fulldatasize
= datasize
+ fulldatasize
;
2485 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2486 downloaddiff
= fulldatasize
- prevdownsize
;
2487 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2488 prevdownsize
= fulldatasize
;
2491 fdata
= f
.getvalue();
2494 os
.remove(tmpfilename
);
2495 exec_time_end
= time
.time();
2496 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2497 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2501 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2502 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2506 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2507 global geturls_download_sleep
, havezstd
, havebrotli
;
2509 sleep
= geturls_download_sleep
;
2512 urlparts
= urlparse
.urlparse(httpurl
);
2513 if(isinstance(httpheaders
, list)):
2514 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2515 httpheaders
= fix_header_names(httpheaders
);
2516 if(httpuseragent
is not None):
2517 if('User-Agent' in httpheaders
):
2518 httpheaders
['User-Agent'] = httpuseragent
;
2520 httpuseragent
.update({'User-Agent': httpuseragent
});
2521 if(httpreferer
is not None):
2522 if('Referer' in httpheaders
):
2523 httpheaders
['Referer'] = httpreferer
;
2525 httpuseragent
.update({'Referer': httpreferer
});
2526 if(urlparts
.username
is not None or urlparts
.password
is not None):
2527 if(sys
.version
[0]=="2"):
2528 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2529 if(sys
.version
[0]>="3"):
2530 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2531 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2533 if(postdata
is not None and not isinstance(postdata
, dict)):
2534 postdata
= urlencode(postdata
);
2536 if(httpmethod
=="GET"):
2537 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2538 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2539 elif(httpmethod
=="POST"):
2540 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2541 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2543 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2544 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2545 except httpx
.ConnectTimeout
:
2546 log
.info("Error With URL "+httpurl
);
2548 except httpx
.ConnectError
:
2549 log
.info("Error With URL "+httpurl
);
2551 except socket
.timeout
:
2552 log
.info("Error With URL "+httpurl
);
2554 httpcodeout
= geturls_text
.status_code
;
2556 httpcodereason
= geturls_text
.reason_phrase
;
2558 httpcodereason
= http_status_to_reason(geturls_text
.status_code
);
2559 httpversionout
= geturls_text
.http_version
;
2560 httpmethodout
= httpmethod
;
2561 httpurlout
= str(geturls_text
.url
);
2562 httpheaderout
= geturls_text
.headers
;
2563 httpheadersentout
= geturls_text
.request
.headers
;
2564 if(isinstance(httpheaderout
, list)):
2565 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2566 if(sys
.version
[0]=="2"):
2568 prehttpheaderout
= httpheaderout
;
2569 httpheaderkeys
= httpheaderout
.keys();
2570 imax
= len(httpheaderkeys
);
2574 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2576 except AttributeError:
2578 httpheaderout
= fix_header_names(httpheaderout
);
2579 if(isinstance(httpheadersentout
, list)):
2580 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2581 httpheadersentout
= fix_header_names(httpheadersentout
);
2582 downloadsize
= httpheaderout
.get('Content-Length');
2583 if(downloadsize
is not None):
2584 downloadsize
= int(downloadsize
);
2585 if downloadsize
is None: downloadsize
= 0;
2588 log
.info("Downloading URL "+httpurl
);
2589 with
BytesIO() as strbuf
:
2591 databytes
= geturls_text
.read();
2592 if not databytes
: break;
2593 datasize
= len(databytes
);
2594 fulldatasize
= datasize
+ fulldatasize
;
2597 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2598 downloaddiff
= fulldatasize
- prevdownsize
;
2599 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2600 prevdownsize
= fulldatasize
;
2601 strbuf
.write(databytes
);
2604 returnval_content
= strbuf
.read();
2605 geturls_text
.close();
2606 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2608 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2611 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2613 returnval_content
= zlib
.decompress(returnval_content
);
2616 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2618 returnval_content
= brotli
.decompress(returnval_content
);
2619 except brotli
.error
:
2621 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2623 returnval_content
= zstandard
.decompress(returnval_content
);
2624 except zstandard
.error
:
2626 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2628 returnval_content
= lzma
.decompress(returnval_content
);
2629 except zstandard
.error
:
2631 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2633 returnval_content
= bz2
.decompress(returnval_content
);
2634 except zstandard
.error
:
2636 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpx2"};
2637 geturls_text
.close();
2641 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2642 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2646 def download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2647 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2648 exec_time_start
= time
.time();
2649 myhash
= hashlib
.new("sha1");
2650 if(sys
.version
[0]=="2"):
2651 myhash
.update(httpurl
);
2652 myhash
.update(str(buffersize
));
2653 myhash
.update(str(exec_time_start
));
2654 if(sys
.version
[0]>="3"):
2655 myhash
.update(httpurl
.encode('utf-8'));
2656 myhash
.update(str(buffersize
).encode('utf-8'));
2657 myhash
.update(str(exec_time_start
).encode('utf-8'));
2658 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2660 sleep
= geturls_download_sleep
;
2663 pretmpfilename
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2664 if(not pretmpfilename
):
2666 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2667 tmpfilename
= f
.name
;
2669 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2670 except AttributeError:
2672 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2677 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2678 f
.write(pretmpfilename
.get('Content'));
2680 exec_time_end
= time
.time();
2681 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2682 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2686 def download_from_url_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2687 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2691 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2692 global geturls_download_sleep
, havezstd
, havebrotli
;
2694 sleep
= geturls_download_sleep
;
2697 if(not outfile
=="-"):
2698 outpath
= outpath
.rstrip(os
.path
.sep
);
2699 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2700 if(not os
.path
.exists(outpath
)):
2701 os
.makedirs(outpath
);
2702 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2704 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2706 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2707 if(not pretmpfilename
):
2709 tmpfilename
= pretmpfilename
.get('Filename');
2710 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2712 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2713 exec_time_start
= time
.time();
2714 shutil
.move(tmpfilename
, filepath
);
2716 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2717 except AttributeError:
2719 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2724 exec_time_end
= time
.time();
2725 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2726 if(os
.path
.exists(tmpfilename
)):
2727 os
.remove(tmpfilename
);
2728 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2730 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2731 tmpfilename
= pretmpfilename
.get('Filename');
2732 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2735 exec_time_start
= time
.time();
2736 with
open(tmpfilename
, 'rb') as ft
:
2739 databytes
= ft
.read(buffersize
[1]);
2740 if not databytes
: break;
2741 datasize
= len(databytes
);
2742 fulldatasize
= datasize
+ fulldatasize
;
2745 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2746 downloaddiff
= fulldatasize
- prevdownsize
;
2747 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2748 prevdownsize
= fulldatasize
;
2751 fdata
= f
.getvalue();
2754 os
.remove(tmpfilename
);
2755 exec_time_end
= time
.time();
2756 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2757 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2761 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2762 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2766 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2767 global geturls_download_sleep
, havezstd
, havebrotli
;
2769 sleep
= geturls_download_sleep
;
2772 urlparts
= urlparse
.urlparse(httpurl
);
2773 if(isinstance(httpheaders
, list)):
2774 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2775 httpheaders
= fix_header_names(httpheaders
);
2776 if(httpuseragent
is not None):
2777 if('User-Agent' in httpheaders
):
2778 httpheaders
['User-Agent'] = httpuseragent
;
2780 httpuseragent
.update({'User-Agent': httpuseragent
});
2781 if(httpreferer
is not None):
2782 if('Referer' in httpheaders
):
2783 httpheaders
['Referer'] = httpreferer
;
2785 httpuseragent
.update({'Referer': httpreferer
});
2786 if(urlparts
.username
is not None or urlparts
.password
is not None):
2787 if(sys
.version
[0]=="2"):
2788 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2789 if(sys
.version
[0]>="3"):
2790 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2791 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2793 if(postdata
is not None and not isinstance(postdata
, dict)):
2794 postdata
= urlencode(postdata
);
2796 if(httpmethod
=="GET"):
2797 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2798 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2799 elif(httpmethod
=="POST"):
2800 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2801 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2803 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2804 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2805 except httpcore
.ConnectTimeout
:
2806 log
.info("Error With URL "+httpurl
);
2808 except httpcore
.ConnectError
:
2809 log
.info("Error With URL "+httpurl
);
2811 except socket
.timeout
:
2812 log
.info("Error With URL "+httpurl
);
2814 httpcodeout
= geturls_text
.status
;
2815 httpcodereason
= http_status_to_reason(geturls_text
.status
);
2816 httpversionout
= "1.1";
2817 httpmethodout
= httpmethod
;
2818 httpurlout
= str(httpurl
);
2819 httpheaderout
= geturls_text
.headers
;
2820 httpheadersentout
= httpheaders
;
2821 if(isinstance(httpheaderout
, list)):
2822 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2823 if(sys
.version
[0]=="2"):
2825 prehttpheaderout
= httpheaderout
;
2826 httpheaderkeys
= httpheaderout
.keys();
2827 imax
= len(httpheaderkeys
);
2831 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2833 except AttributeError:
2835 httpheaderout
= fix_header_names(httpheaderout
);
2836 if(isinstance(httpheadersentout
, list)):
2837 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2838 httpheadersentout
= fix_header_names(httpheadersentout
);
2839 downloadsize
= httpheaderout
.get('Content-Length');
2840 if(downloadsize
is not None):
2841 downloadsize
= int(downloadsize
);
2842 if downloadsize
is None: downloadsize
= 0;
2845 log
.info("Downloading URL "+httpurl
);
2846 with
BytesIO() as strbuf
:
2848 databytes
= geturls_text
.read();
2849 if not databytes
: break;
2850 datasize
= len(databytes
);
2851 fulldatasize
= datasize
+ fulldatasize
;
2854 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2855 downloaddiff
= fulldatasize
- prevdownsize
;
2856 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2857 prevdownsize
= fulldatasize
;
2858 strbuf
.write(databytes
);
2861 returnval_content
= strbuf
.read();
2862 geturls_text
.close();
2863 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2865 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2868 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2870 returnval_content
= zlib
.decompress(returnval_content
);
2873 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2875 returnval_content
= brotli
.decompress(returnval_content
);
2876 except brotli
.error
:
2878 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2880 returnval_content
= zstandard
.decompress(returnval_content
);
2881 except zstandard
.error
:
2883 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2885 returnval_content
= lzma
.decompress(returnval_content
);
2886 except zstandard
.error
:
2888 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2890 returnval_content
= bz2
.decompress(returnval_content
);
2891 except zstandard
.error
:
2893 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpcore"};
2894 geturls_text
.close();
2897 if(not havehttpcore
):
2898 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2899 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2903 def download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2904 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2905 exec_time_start
= time
.time();
2906 myhash
= hashlib
.new("sha1");
2907 if(sys
.version
[0]=="2"):
2908 myhash
.update(httpurl
);
2909 myhash
.update(str(buffersize
));
2910 myhash
.update(str(exec_time_start
));
2911 if(sys
.version
[0]>="3"):
2912 myhash
.update(httpurl
.encode('utf-8'));
2913 myhash
.update(str(buffersize
).encode('utf-8'));
2914 myhash
.update(str(exec_time_start
).encode('utf-8'));
2915 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2917 sleep
= geturls_download_sleep
;
2920 pretmpfilename
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2921 if(not pretmpfilename
):
2923 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2924 tmpfilename
= f
.name
;
2926 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2927 except AttributeError:
2929 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2934 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2935 f
.write(pretmpfilename
.get('Content'));
2937 exec_time_end
= time
.time();
2938 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2939 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2942 if(not havehttpcore
):
2943 def download_from_url_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2944 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2948 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2949 global geturls_download_sleep
, havezstd
, havebrotli
;
2951 sleep
= geturls_download_sleep
;
2954 if(not outfile
=="-"):
2955 outpath
= outpath
.rstrip(os
.path
.sep
);
2956 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2957 if(not os
.path
.exists(outpath
)):
2958 os
.makedirs(outpath
);
2959 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2961 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2963 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2964 if(not pretmpfilename
):
2966 tmpfilename
= pretmpfilename
.get('Filename');
2967 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2969 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2970 exec_time_start
= time
.time();
2971 shutil
.move(tmpfilename
, filepath
);
2973 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2974 except AttributeError:
2976 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2981 exec_time_end
= time
.time();
2982 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2983 if(os
.path
.exists(tmpfilename
)):
2984 os
.remove(tmpfilename
);
2985 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2987 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2988 tmpfilename
= pretmpfilename
.get('Filename');
2989 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2992 exec_time_start
= time
.time();
2993 with
open(tmpfilename
, 'rb') as ft
:
2996 databytes
= ft
.read(buffersize
[1]);
2997 if not databytes
: break;
2998 datasize
= len(databytes
);
2999 fulldatasize
= datasize
+ fulldatasize
;
3002 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3003 downloaddiff
= fulldatasize
- prevdownsize
;
3004 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3005 prevdownsize
= fulldatasize
;
3008 fdata
= f
.getvalue();
3011 os
.remove(tmpfilename
);
3012 exec_time_end
= time
.time();
3013 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3014 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3017 if(not havehttpcore
):
3018 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3019 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3023 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3024 global geturls_download_sleep
, havezstd
, havebrotli
;
3026 sleep
= geturls_download_sleep
;
3029 urlparts
= urlparse
.urlparse(httpurl
);
3030 if(isinstance(httpheaders
, list)):
3031 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3032 httpheaders
= fix_header_names(httpheaders
);
3033 if(httpuseragent
is not None):
3034 if('User-Agent' in httpheaders
):
3035 httpheaders
['User-Agent'] = httpuseragent
;
3037 httpuseragent
.update({'User-Agent': httpuseragent
});
3038 if(httpreferer
is not None):
3039 if('Referer' in httpheaders
):
3040 httpheaders
['Referer'] = httpreferer
;
3042 httpuseragent
.update({'Referer': httpreferer
});
3043 if(urlparts
.username
is not None or urlparts
.password
is not None):
3044 if(sys
.version
[0]=="2"):
3045 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3046 if(sys
.version
[0]>="3"):
3047 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3048 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3050 if(postdata
is not None and not isinstance(postdata
, dict)):
3051 postdata
= urlencode(postdata
);
3053 if(httpmethod
=="GET"):
3054 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3055 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3056 elif(httpmethod
=="POST"):
3057 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3058 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
3060 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3061 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3062 except httpcore
.ConnectTimeout
:
3063 log
.info("Error With URL "+httpurl
);
3065 except httpcore
.ConnectError
:
3066 log
.info("Error With URL "+httpurl
);
3068 except socket
.timeout
:
3069 log
.info("Error With URL "+httpurl
);
3071 httpcodeout
= geturls_text
.status
;
3072 httpcodereason
= http_status_to_reason(geturls_text
.status
);
3073 httpversionout
= "1.1";
3074 httpmethodout
= httpmethod
;
3075 httpurlout
= str(httpurl
);
3076 httpheaderout
= geturls_text
.headers
;
3077 httpheadersentout
= httpheaders
;
3078 if(isinstance(httpheaderout
, list)):
3079 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3080 if(sys
.version
[0]=="2"):
3082 prehttpheaderout
= httpheaderout
;
3083 httpheaderkeys
= httpheaderout
.keys();
3084 imax
= len(httpheaderkeys
);
3088 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3090 except AttributeError:
3092 httpheaderout
= fix_header_names(httpheaderout
);
3093 if(isinstance(httpheadersentout
, list)):
3094 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3095 httpheadersentout
= fix_header_names(httpheadersentout
);
3096 downloadsize
= httpheaderout
.get('Content-Length');
3097 if(downloadsize
is not None):
3098 downloadsize
= int(downloadsize
);
3099 if downloadsize
is None: downloadsize
= 0;
3102 log
.info("Downloading URL "+httpurl
);
3103 with
BytesIO() as strbuf
:
3105 databytes
= geturls_text
.read();
3106 if not databytes
: break;
3107 datasize
= len(databytes
);
3108 fulldatasize
= datasize
+ fulldatasize
;
3111 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3112 downloaddiff
= fulldatasize
- prevdownsize
;
3113 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3114 prevdownsize
= fulldatasize
;
3115 strbuf
.write(databytes
);
3118 returnval_content
= strbuf
.read();
3119 geturls_text
.close();
3120 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3122 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3125 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3127 returnval_content
= zlib
.decompress(returnval_content
);
3130 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3132 returnval_content
= brotli
.decompress(returnval_content
);
3133 except brotli
.error
:
3135 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3137 returnval_content
= zstandard
.decompress(returnval_content
);
3138 except zstandard
.error
:
3140 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3142 returnval_content
= lzma
.decompress(returnval_content
);
3143 except zstandard
.error
:
3145 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3147 returnval_content
= bz2
.decompress(returnval_content
);
3148 except zstandard
.error
:
3150 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpcore2"};
3151 geturls_text
.close();
3154 if(not havehttpcore
):
3155 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3156 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3160 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3161 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3162 exec_time_start
= time
.time();
3163 myhash
= hashlib
.new("sha1");
3164 if(sys
.version
[0]=="2"):
3165 myhash
.update(httpurl
);
3166 myhash
.update(str(buffersize
));
3167 myhash
.update(str(exec_time_start
));
3168 if(sys
.version
[0]>="3"):
3169 myhash
.update(httpurl
.encode('utf-8'));
3170 myhash
.update(str(buffersize
).encode('utf-8'));
3171 myhash
.update(str(exec_time_start
).encode('utf-8'));
3172 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3174 sleep
= geturls_download_sleep
;
3177 pretmpfilename
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3178 if(not pretmpfilename
):
3180 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3181 tmpfilename
= f
.name
;
3183 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3184 except AttributeError:
3186 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3191 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3192 f
.write(pretmpfilename
.get('Content'));
3194 exec_time_end
= time
.time();
3195 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3196 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3199 if(not havehttpcore
):
3200 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3201 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3205 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3206 global geturls_download_sleep
, havezstd
, havebrotli
;
3208 sleep
= geturls_download_sleep
;
3211 if(not outfile
=="-"):
3212 outpath
= outpath
.rstrip(os
.path
.sep
);
3213 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3214 if(not os
.path
.exists(outpath
)):
3215 os
.makedirs(outpath
);
3216 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3218 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3220 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3221 if(not pretmpfilename
):
3223 tmpfilename
= pretmpfilename
.get('Filename');
3224 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3226 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3227 exec_time_start
= time
.time();
3228 shutil
.move(tmpfilename
, filepath
);
3230 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3231 except AttributeError:
3233 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3238 exec_time_end
= time
.time();
3239 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3240 if(os
.path
.exists(tmpfilename
)):
3241 os
.remove(tmpfilename
);
3242 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3244 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3245 tmpfilename
= pretmpfilename
.get('Filename');
3246 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3249 exec_time_start
= time
.time();
3250 with
open(tmpfilename
, 'rb') as ft
:
3253 databytes
= ft
.read(buffersize
[1]);
3254 if not databytes
: break;
3255 datasize
= len(databytes
);
3256 fulldatasize
= datasize
+ fulldatasize
;
3259 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3260 downloaddiff
= fulldatasize
- prevdownsize
;
3261 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3262 prevdownsize
= fulldatasize
;
3265 fdata
= f
.getvalue();
3268 os
.remove(tmpfilename
);
3269 exec_time_end
= time
.time();
3270 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3271 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3275 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3276 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3280 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3281 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3284 if(not haveurllib3
):
3285 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3286 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3290 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3291 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3294 if(not haveurllib3
):
3295 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3296 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3300 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3301 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3304 if(not haveurllib3
):
3305 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3306 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3310 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3311 global geturls_download_sleep
, havezstd
, havebrotli
;
3313 sleep
= geturls_download_sleep
;
3316 urlparts
= urlparse
.urlparse(httpurl
);
3317 if(isinstance(httpheaders
, list)):
3318 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3319 httpheaders
= fix_header_names(httpheaders
);
3320 if(httpuseragent
is not None):
3321 if('User-Agent' in httpheaders
):
3322 httpheaders
['User-Agent'] = httpuseragent
;
3324 httpuseragent
.update({'User-Agent': httpuseragent
});
3325 if(httpreferer
is not None):
3326 if('Referer' in httpheaders
):
3327 httpheaders
['Referer'] = httpreferer
;
3329 httpuseragent
.update({'Referer': httpreferer
});
3330 if(urlparts
.username
is not None or urlparts
.password
is not None):
3331 if(sys
.version
[0]=="2"):
3332 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3333 if(sys
.version
[0]>="3"):
3334 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3335 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3337 timeout
= urllib3
.util
.Timeout(connect
=timeout
, read
=timeout
);
3338 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
, timeout
=timeout
);
3339 if(postdata
is not None and not isinstance(postdata
, dict)):
3340 postdata
= urlencode(postdata
);
3342 if(httpmethod
=="GET"):
3343 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3344 elif(httpmethod
=="POST"):
3345 geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
3347 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3348 except urllib3
.exceptions
.ConnectTimeoutError
:
3349 log
.info("Error With URL "+httpurl
);
3351 except urllib3
.exceptions
.ConnectError
:
3352 log
.info("Error With URL "+httpurl
);
3354 except urllib3
.exceptions
.MaxRetryError
:
3355 log
.info("Error With URL "+httpurl
);
3357 except socket
.timeout
:
3358 log
.info("Error With URL "+httpurl
);
3361 log
.info("Error With URL "+httpurl
);
3363 httpcodeout
= geturls_text
.status
;
3364 httpcodereason
= geturls_text
.reason
;
3365 if(geturls_text
.version
=="10"):
3366 httpversionout
= "1.0";
3368 httpversionout
= "1.1";
3369 httpmethodout
= httpmethod
;
3370 httpurlout
= geturls_text
.geturl();
3371 httpheaderout
= geturls_text
.info();
3372 httpheadersentout
= httpheaders
;
3373 if(isinstance(httpheaderout
, list)):
3374 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3375 if(sys
.version
[0]=="2"):
3377 prehttpheaderout
= httpheaderout
;
3378 httpheaderkeys
= httpheaderout
.keys();
3379 imax
= len(httpheaderkeys
);
3383 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3385 except AttributeError:
3387 httpheaderout
= fix_header_names(httpheaderout
);
3388 if(isinstance(httpheadersentout
, list)):
3389 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3390 httpheadersentout
= fix_header_names(httpheadersentout
);
3391 downloadsize
= httpheaderout
.get('Content-Length');
3392 if(downloadsize
is not None):
3393 downloadsize
= int(downloadsize
);
3394 if downloadsize
is None: downloadsize
= 0;
3397 log
.info("Downloading URL "+httpurl
);
3398 with
BytesIO() as strbuf
:
3400 databytes
= geturls_text
.read(buffersize
);
3401 if not databytes
: break;
3402 datasize
= len(databytes
);
3403 fulldatasize
= datasize
+ fulldatasize
;
3406 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3407 downloaddiff
= fulldatasize
- prevdownsize
;
3408 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3409 prevdownsize
= fulldatasize
;
3410 strbuf
.write(databytes
);
3412 returnval_content
= strbuf
.read();
3413 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3415 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3418 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3420 returnval_content
= zlib
.decompress(returnval_content
);
3423 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3425 returnval_content
= brotli
.decompress(returnval_content
);
3426 except brotli
.error
:
3428 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3430 returnval_content
= zstandard
.decompress(returnval_content
);
3431 except zstandard
.error
:
3433 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3435 returnval_content
= lzma
.decompress(returnval_content
);
3436 except zstandard
.error
:
3438 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3440 returnval_content
= bz2
.decompress(returnval_content
);
3441 except zstandard
.error
:
3443 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "urllib3"};
3444 geturls_text
.close();
3447 if(not haveurllib3
):
3448 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3449 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3453 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3454 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3455 exec_time_start
= time
.time();
3456 myhash
= hashlib
.new("sha1");
3457 if(sys
.version
[0]=="2"):
3458 myhash
.update(httpurl
);
3459 myhash
.update(str(buffersize
));
3460 myhash
.update(str(exec_time_start
));
3461 if(sys
.version
[0]>="3"):
3462 myhash
.update(httpurl
.encode('utf-8'));
3463 myhash
.update(str(buffersize
).encode('utf-8'));
3464 myhash
.update(str(exec_time_start
).encode('utf-8'));
3465 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3467 sleep
= geturls_download_sleep
;
3470 pretmpfilename
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3471 if(not pretmpfilename
):
3473 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3474 tmpfilename
= f
.name
;
3476 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3477 except AttributeError:
3479 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3484 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3485 f
.write(pretmpfilename
.get('Content'));
3487 exec_time_end
= time
.time();
3488 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3489 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3492 if(not haveurllib3
):
3493 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3494 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3498 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3499 global geturls_download_sleep
, havezstd
, havebrotli
;
3501 sleep
= geturls_download_sleep
;
3504 if(not outfile
=="-"):
3505 outpath
= outpath
.rstrip(os
.path
.sep
);
3506 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3507 if(not os
.path
.exists(outpath
)):
3508 os
.makedirs(outpath
);
3509 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3511 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3513 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3514 if(not pretmpfilename
):
3516 tmpfilename
= pretmpfilename
.get('Filename');
3517 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3519 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3520 exec_time_start
= time
.time();
3521 shutil
.move(tmpfilename
, filepath
);
3523 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3524 except AttributeError:
3526 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3531 exec_time_end
= time
.time();
3532 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3533 if(os
.path
.exists(tmpfilename
)):
3534 os
.remove(tmpfilename
);
3535 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3537 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3538 tmpfilename
= pretmpfilename
.get('Filename');
3539 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3542 exec_time_start
= time
.time();
3543 with
open(tmpfilename
, 'rb') as ft
:
3546 databytes
= ft
.read(buffersize
[1]);
3547 if not databytes
: break;
3548 datasize
= len(databytes
);
3549 fulldatasize
= datasize
+ fulldatasize
;
3552 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3553 downloaddiff
= fulldatasize
- prevdownsize
;
3554 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3555 prevdownsize
= fulldatasize
;
3558 fdata
= f
.getvalue();
3561 os
.remove(tmpfilename
);
3562 exec_time_end
= time
.time();
3563 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3564 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3567 if(not haveurllib3
):
3568 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3569 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3573 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3574 global geturls_download_sleep
, havezstd
, havebrotli
;
3576 sleep
= geturls_download_sleep
;
3579 urlparts
= urlparse
.urlparse(httpurl
);
3580 if(isinstance(httpheaders
, list)):
3581 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3582 httpheaders
= fix_header_names(httpheaders
);
3583 if(httpuseragent
is not None):
3584 if('User-Agent' in httpheaders
):
3585 httpheaders
['User-Agent'] = httpuseragent
;
3587 httpuseragent
.update({'User-Agent': httpuseragent
});
3588 if(httpreferer
is not None):
3589 if('Referer' in httpheaders
):
3590 httpheaders
['Referer'] = httpreferer
;
3592 httpuseragent
.update({'Referer': httpreferer
});
3593 if(urlparts
.username
is not None or urlparts
.password
is not None):
3594 if(sys
.version
[0]=="2"):
3595 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3596 if(sys
.version
[0]>="3"):
3597 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3598 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3599 geturls_opener
= mechanize
.Browser();
3600 if(isinstance(httpheaders
, dict)):
3601 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
3603 geturls_opener
.addheaders
= httpheaders
;
3604 geturls_opener
.set_cookiejar(httpcookie
);
3605 geturls_opener
.set_handle_robots(False);
3606 if(postdata
is not None and not isinstance(postdata
, dict)):
3607 postdata
= urlencode(postdata
);
3609 if(httpmethod
=="GET"):
3610 geturls_text
= geturls_opener
.open(httpurl
);
3611 elif(httpmethod
=="POST"):
3612 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
3614 geturls_text
= geturls_opener
.open(httpurl
);
3615 except mechanize
.HTTPError
as geturls_text_error
:
3616 geturls_text
= geturls_text_error
;
3617 log
.info("Error With URL "+httpurl
);
3619 log
.info("Error With URL "+httpurl
);
3621 except socket
.timeout
:
3622 log
.info("Error With URL "+httpurl
);
3624 httpcodeout
= geturls_text
.code
;
3625 httpcodereason
= geturls_text
.msg
;
3626 httpversionout
= "1.1";
3627 httpmethodout
= httpmethod
;
3628 httpurlout
= geturls_text
.geturl();
3629 httpheaderout
= geturls_text
.info();
3630 reqhead
= geturls_opener
.request
;
3631 httpheadersentout
= reqhead
.header_items();
3632 if(isinstance(httpheaderout
, list)):
3633 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3634 if(sys
.version
[0]=="2"):
3636 prehttpheaderout
= httpheaderout
;
3637 httpheaderkeys
= httpheaderout
.keys();
3638 imax
= len(httpheaderkeys
);
3642 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3644 except AttributeError:
3646 httpheaderout
= fix_header_names(httpheaderout
);
3647 if(isinstance(httpheadersentout
, list)):
3648 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3649 httpheadersentout
= fix_header_names(httpheadersentout
);
3650 downloadsize
= httpheaderout
.get('Content-Length');
3651 if(downloadsize
is not None):
3652 downloadsize
= int(downloadsize
);
3653 if downloadsize
is None: downloadsize
= 0;
3656 log
.info("Downloading URL "+httpurl
);
3657 with
BytesIO() as strbuf
:
3659 databytes
= geturls_text
.read(buffersize
);
3660 if not databytes
: break;
3661 datasize
= len(databytes
);
3662 fulldatasize
= datasize
+ fulldatasize
;
3665 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3666 downloaddiff
= fulldatasize
- prevdownsize
;
3667 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3668 prevdownsize
= fulldatasize
;
3669 strbuf
.write(databytes
);
3671 returnval_content
= strbuf
.read();
3672 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3674 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3677 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3679 returnval_content
= zlib
.decompress(returnval_content
);
3682 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3684 returnval_content
= brotli
.decompress(returnval_content
);
3685 except brotli
.error
:
3687 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3689 returnval_content
= zstandard
.decompress(returnval_content
);
3690 except zstandard
.error
:
3692 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3694 returnval_content
= lzma
.decompress(returnval_content
);
3695 except zstandard
.error
:
3697 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3699 returnval_content
= bz2
.decompress(returnval_content
);
3700 except zstandard
.error
:
3702 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "mechanize"};
3703 geturls_text
.close();
3706 if(not havemechanize
):
3707 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3708 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3712 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3713 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3714 exec_time_start
= time
.time();
3715 myhash
= hashlib
.new("sha1");
3716 if(sys
.version
[0]=="2"):
3717 myhash
.update(httpurl
);
3718 myhash
.update(str(buffersize
));
3719 myhash
.update(str(exec_time_start
));
3720 if(sys
.version
[0]>="3"):
3721 myhash
.update(httpurl
.encode('utf-8'));
3722 myhash
.update(str(buffersize
).encode('utf-8'));
3723 myhash
.update(str(exec_time_start
).encode('utf-8'));
3724 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3726 sleep
= geturls_download_sleep
;
3729 pretmpfilename
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3730 if(not pretmpfilename
):
3732 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3733 tmpfilename
= f
.name
;
3735 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3736 except AttributeError:
3738 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3743 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3744 f
.write(pretmpfilename
.get('Content'));
3746 exec_time_end
= time
.time();
3747 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3748 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3751 if(not havemechanize
):
3752 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3753 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3757 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3758 global geturls_download_sleep
, havezstd
, havebrotli
;
3760 sleep
= geturls_download_sleep
;
3763 if(not outfile
=="-"):
3764 outpath
= outpath
.rstrip(os
.path
.sep
);
3765 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3766 if(not os
.path
.exists(outpath
)):
3767 os
.makedirs(outpath
);
3768 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3770 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3772 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3773 if(not pretmpfilename
):
3775 tmpfilename
= pretmpfilename
.get('Filename');
3776 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3778 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3779 exec_time_start
= time
.time();
3780 shutil
.move(tmpfilename
, filepath
);
3782 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3783 except AttributeError:
3785 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3790 exec_time_end
= time
.time();
3791 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3792 if(os
.path
.exists(tmpfilename
)):
3793 os
.remove(tmpfilename
);
3794 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3796 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3797 tmpfilename
= pretmpfilename
.get('Filename');
3798 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3801 exec_time_start
= time
.time();
3802 with
open(tmpfilename
, 'rb') as ft
:
3805 databytes
= ft
.read(buffersize
[1]);
3806 if not databytes
: break;
3807 datasize
= len(databytes
);
3808 fulldatasize
= datasize
+ fulldatasize
;
3811 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3812 downloaddiff
= fulldatasize
- prevdownsize
;
3813 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3814 prevdownsize
= fulldatasize
;
3817 fdata
= f
.getvalue();
3820 os
.remove(tmpfilename
);
3821 exec_time_end
= time
.time();
3822 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3823 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3826 if(not havemechanize
):
3827 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3828 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3832 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3833 global geturls_download_sleep
, havezstd
, havebrotli
;
3835 sleep
= geturls_download_sleep
;
3838 urlparts
= urlparse
.urlparse(httpurl
);
3839 if(isinstance(httpheaders
, list)):
3840 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3841 httpheaders
= fix_header_names(httpheaders
);
3842 if(httpuseragent
is not None):
3843 if('User-Agent' in httpheaders
):
3844 httpheaders
['User-Agent'] = httpuseragent
;
3846 httpuseragent
.update({'User-Agent': httpuseragent
});
3847 if(httpreferer
is not None):
3848 if('Referer' in httpheaders
):
3849 httpheaders
['Referer'] = httpreferer
;
3851 httpuseragent
.update({'Referer': httpreferer
});
3852 if(urlparts
.username
is not None or urlparts
.password
is not None):
3853 if(sys
.version
[0]=="2"):
3854 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3855 if(sys
.version
[0]>="3"):
3856 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3857 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3858 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
3859 if(isinstance(httpheaders
, dict)):
3860 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
3861 geturls_opener
.addheaders
= httpheaders
;
3863 if(postdata
is not None and not isinstance(postdata
, dict)):
3864 postdata
= urlencode(postdata
);
3865 retrieved_body
= BytesIO();
3866 retrieved_headers
= BytesIO();
3868 if(httpmethod
=="GET"):
3869 geturls_text
= pycurl
.Curl();
3870 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3871 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3872 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3873 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3874 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3875 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3876 geturls_text
.perform();
3877 elif(httpmethod
=="POST"):
3878 geturls_text
= pycurl
.Curl();
3879 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3880 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3881 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3882 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3883 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3884 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3885 geturls_text
.setopt(geturls_text
.POST
, True);
3886 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
3887 geturls_text
.perform();
3889 geturls_text
= pycurl
.Curl();
3890 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3891 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3892 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3893 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3894 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3895 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3896 geturls_text
.perform();
3897 retrieved_headers
.seek(0);
3898 if(sys
.version
[0]=="2"):
3899 pycurlhead
= retrieved_headers
.read();
3900 if(sys
.version
[0]>="3"):
3901 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
3902 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip().rstrip('\r\n'))[0];
3903 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
3904 retrieved_body
.seek(0);
3905 except socket
.timeout
:
3906 log
.info("Error With URL "+httpurl
);
3908 except socket
.gaierror
:
3909 log
.info("Error With URL "+httpurl
);
3912 log
.info("Error With URL "+httpurl
);
3914 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
3915 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
3916 httpversionout
= pyhttpverinfo
[0];
3917 httpmethodout
= httpmethod
;
3918 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
3919 httpheaderout
= pycurlheadersout
;
3920 httpheadersentout
= httpheaders
;
3921 if(isinstance(httpheaderout
, list)):
3922 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
3923 if(sys
.version
[0]=="2"):
3925 prehttpheaderout
= httpheaderout
;
3926 httpheaderkeys
= httpheaderout
.keys();
3927 imax
= len(httpheaderkeys
);
3931 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3933 except AttributeError:
3935 httpheaderout
= fix_header_names(httpheaderout
);
3936 if(isinstance(httpheadersentout
, list)):
3937 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
3938 httpheadersentout
= fix_header_names(httpheadersentout
);
3939 downloadsize
= httpheaderout
.get('Content-Length');
3940 if(downloadsize
is not None):
3941 downloadsize
= int(downloadsize
);
3942 if downloadsize
is None: downloadsize
= 0;
3945 log
.info("Downloading URL "+httpurl
);
3946 with
BytesIO() as strbuf
:
3948 databytes
= retrieved_body
.read(buffersize
);
3949 if not databytes
: break;
3950 datasize
= len(databytes
);
3951 fulldatasize
= datasize
+ fulldatasize
;
3954 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3955 downloaddiff
= fulldatasize
- prevdownsize
;
3956 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3957 prevdownsize
= fulldatasize
;
3958 strbuf
.write(databytes
);
3960 returnval_content
= strbuf
.read();
3961 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3963 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3966 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3968 returnval_content
= zlib
.decompress(returnval_content
);
3971 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3973 returnval_content
= brotli
.decompress(returnval_content
);
3974 except brotli
.error
:
3976 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3978 returnval_content
= zstandard
.decompress(returnval_content
);
3979 except zstandard
.error
:
3981 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3983 returnval_content
= lzma
.decompress(returnval_content
);
3984 except zstandard
.error
:
3986 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3988 returnval_content
= bz2
.decompress(returnval_content
);
3989 except zstandard
.error
:
3991 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl"};
3992 geturls_text
.close();
3996 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3997 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4001 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4002 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4003 exec_time_start
= time
.time();
4004 myhash
= hashlib
.new("sha1");
4005 if(sys
.version
[0]=="2"):
4006 myhash
.update(httpurl
);
4007 myhash
.update(str(buffersize
));
4008 myhash
.update(str(exec_time_start
));
4009 if(sys
.version
[0]>="3"):
4010 myhash
.update(httpurl
.encode('utf-8'));
4011 myhash
.update(str(buffersize
).encode('utf-8'));
4012 myhash
.update(str(exec_time_start
).encode('utf-8'));
4013 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4015 sleep
= geturls_download_sleep
;
4018 pretmpfilename
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4019 if(not pretmpfilename
):
4021 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4022 tmpfilename
= f
.name
;
4024 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4025 except AttributeError:
4027 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4032 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4033 f
.write(pretmpfilename
.get('Content'));
4035 exec_time_end
= time
.time();
4036 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4037 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4041 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4042 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4046 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4047 global geturls_download_sleep
, havezstd
, havebrotli
;
4049 sleep
= geturls_download_sleep
;
4052 if(not outfile
=="-"):
4053 outpath
= outpath
.rstrip(os
.path
.sep
);
4054 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4055 if(not os
.path
.exists(outpath
)):
4056 os
.makedirs(outpath
);
4057 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4059 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4061 pretmpfilename
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4062 if(not pretmpfilename
):
4064 tmpfilename
= pretmpfilename
.get('Filename');
4065 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4067 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4068 exec_time_start
= time
.time();
4069 shutil
.move(tmpfilename
, filepath
);
4071 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4072 except AttributeError:
4074 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4079 exec_time_end
= time
.time();
4080 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4081 if(os
.path
.exists(tmpfilename
)):
4082 os
.remove(tmpfilename
);
4083 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4085 pretmpfilename
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4086 tmpfilename
= pretmpfilename
.get('Filename');
4087 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4090 exec_time_start
= time
.time();
4091 with
open(tmpfilename
, 'rb') as ft
:
4094 databytes
= ft
.read(buffersize
[1]);
4095 if not databytes
: break;
4096 datasize
= len(databytes
);
4097 fulldatasize
= datasize
+ fulldatasize
;
4100 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4101 downloaddiff
= fulldatasize
- prevdownsize
;
4102 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4103 prevdownsize
= fulldatasize
;
4106 fdata
= f
.getvalue();
4109 os
.remove(tmpfilename
);
4110 exec_time_end
= time
.time();
4111 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4112 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4116 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4117 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4120 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4121 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4122 global geturls_download_sleep
, havezstd
, havebrotli
;
4124 sleep
= geturls_download_sleep
;
4127 urlparts
= urlparse
.urlparse(httpurl
);
4128 if(isinstance(httpheaders
, list)):
4129 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4130 httpheaders
= fix_header_names(httpheaders
);
4131 if(httpuseragent
is not None):
4132 if('User-Agent' in httpheaders
):
4133 httpheaders
['User-Agent'] = httpuseragent
;
4135 httpuseragent
.update({'User-Agent': httpuseragent
});
4136 if(httpreferer
is not None):
4137 if('Referer' in httpheaders
):
4138 httpheaders
['Referer'] = httpreferer
;
4140 httpuseragent
.update({'Referer': httpreferer
});
4141 if(urlparts
.username
is not None or urlparts
.password
is not None):
4142 if(sys
.version
[0]=="2"):
4143 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
4144 if(sys
.version
[0]>="3"):
4145 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4146 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4147 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
4148 if(isinstance(httpheaders
, dict)):
4149 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
4150 geturls_opener
.addheaders
= httpheaders
;
4152 if(postdata
is not None and not isinstance(postdata
, dict)):
4153 postdata
= urlencode(postdata
);
4154 retrieved_body
= BytesIO();
4155 retrieved_headers
= BytesIO();
4157 if(httpmethod
=="GET"):
4158 geturls_text
= pycurl
.Curl();
4159 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4160 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4161 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4162 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4163 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4164 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4165 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4166 geturls_text
.perform();
4167 elif(httpmethod
=="POST"):
4168 geturls_text
= pycurl
.Curl();
4169 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4170 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4171 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4172 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4173 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4174 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4175 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4176 geturls_text
.setopt(geturls_text
.POST
, True);
4177 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
4178 geturls_text
.perform();
4180 geturls_text
= pycurl
.Curl();
4181 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4182 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4183 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4184 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4185 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4186 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4187 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4188 geturls_text
.perform();
4189 retrieved_headers
.seek(0);
4190 if(sys
.version
[0]=="2"):
4191 pycurlhead
= retrieved_headers
.read();
4192 if(sys
.version
[0]>="3"):
4193 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
4194 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip())[0];
4195 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
4196 retrieved_body
.seek(0);
4197 except socket
.timeout
:
4198 log
.info("Error With URL "+httpurl
);
4200 except socket
.gaierror
:
4201 log
.info("Error With URL "+httpurl
);
4204 log
.info("Error With URL "+httpurl
);
4206 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
4207 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
4208 httpversionout
= pyhttpverinfo
[0];
4209 httpmethodout
= httpmethod
;
4210 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
4211 httpheaderout
= pycurlheadersout
;
4212 httpheadersentout
= httpheaders
;
4213 if(isinstance(httpheaderout
, list)):
4214 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
4215 if(sys
.version
[0]=="2"):
4217 prehttpheaderout
= httpheaderout
;
4218 httpheaderkeys
= httpheaderout
.keys();
4219 imax
= len(httpheaderkeys
);
4223 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4225 except AttributeError:
4227 httpheaderout
= fix_header_names(httpheaderout
);
4228 if(isinstance(httpheadersentout
, list)):
4229 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
4230 httpheadersentout
= fix_header_names(httpheadersentout
);
4231 downloadsize
= httpheaderout
.get('Content-Length');
4232 if(downloadsize
is not None):
4233 downloadsize
= int(downloadsize
);
4234 if downloadsize
is None: downloadsize
= 0;
4237 log
.info("Downloading URL "+httpurl
);
4238 with
BytesIO() as strbuf
:
4240 databytes
= retrieved_body
.read(buffersize
);
4241 if not databytes
: break;
4242 datasize
= len(databytes
);
4243 fulldatasize
= datasize
+ fulldatasize
;
4246 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4247 downloaddiff
= fulldatasize
- prevdownsize
;
4248 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4249 prevdownsize
= fulldatasize
;
4250 strbuf
.write(databytes
);
4252 returnval_content
= strbuf
.read();
4253 if(httpheaderout
.get("Content-Encoding")=="gzip"):
4255 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
4258 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
4260 returnval_content
= zlib
.decompress(returnval_content
);
4263 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4265 returnval_content
= brotli
.decompress(returnval_content
);
4266 except brotli
.error
:
4268 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
4270 returnval_content
= zstandard
.decompress(returnval_content
);
4271 except zstandard
.error
:
4273 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
4275 returnval_content
= lzma
.decompress(returnval_content
);
4276 except zstandard
.error
:
4278 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
4280 returnval_content
= bz2
.decompress(returnval_content
);
4281 except zstandard
.error
:
4283 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl2"};
4284 geturls_text
.close();
4288 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4289 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4292 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4293 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4294 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4297 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4298 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4299 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4300 exec_time_start
= time
.time();
4301 myhash
= hashlib
.new("sha1");
4302 if(sys
.version
[0]=="2"):
4303 myhash
.update(httpurl
);
4304 myhash
.update(str(buffersize
));
4305 myhash
.update(str(exec_time_start
));
4306 if(sys
.version
[0]>="3"):
4307 myhash
.update(httpurl
.encode('utf-8'));
4308 myhash
.update(str(buffersize
).encode('utf-8'));
4309 myhash
.update(str(exec_time_start
).encode('utf-8'));
4310 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4312 sleep
= geturls_download_sleep
;
4315 pretmpfilename
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4316 if(not pretmpfilename
):
4318 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4319 tmpfilename
= f
.name
;
4321 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4322 except AttributeError:
4324 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4329 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4330 f
.write(pretmpfilename
.get('Content'));
4332 exec_time_end
= time
.time();
4333 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4334 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4338 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4339 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4342 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4343 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4344 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4347 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4348 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4349 global geturls_download_sleep
, havezstd
, havebrotli
;
4351 sleep
= geturls_download_sleep
;
4354 if(not outfile
=="-"):
4355 outpath
= outpath
.rstrip(os
.path
.sep
);
4356 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4357 if(not os
.path
.exists(outpath
)):
4358 os
.makedirs(outpath
);
4359 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4361 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4363 pretmpfilename
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4364 if(not pretmpfilename
):
4366 tmpfilename
= pretmpfilename
.get('Filename');
4367 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4369 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4370 exec_time_start
= time
.time();
4371 shutil
.move(tmpfilename
, filepath
);
4373 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4374 except AttributeError:
4376 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4381 exec_time_end
= time
.time();
4382 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4383 if(os
.path
.exists(tmpfilename
)):
4384 os
.remove(tmpfilename
);
4385 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4387 pretmpfilename
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4388 tmpfilename
= pretmpfilename
.get('Filename');
4389 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4392 exec_time_start
= time
.time();
4393 with
open(tmpfilename
, 'rb') as ft
:
4396 databytes
= ft
.read(buffersize
[1]);
4397 if not databytes
: break;
4398 datasize
= len(databytes
);
4399 fulldatasize
= datasize
+ fulldatasize
;
4402 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4403 downloaddiff
= fulldatasize
- prevdownsize
;
4404 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4405 prevdownsize
= fulldatasize
;
4408 fdata
= f
.getvalue();
4411 os
.remove(tmpfilename
);
4412 exec_time_end
= time
.time();
4413 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4414 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4418 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4419 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4422 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4423 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4424 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4427 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4428 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4429 global geturls_download_sleep
, havezstd
, havebrotli
;
4431 sleep
= geturls_download_sleep
;
4434 urlparts
= urlparse
.urlparse(httpurl
);
4435 if(isinstance(httpheaders
, list)):
4436 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4437 httpheaders
= fix_header_names(httpheaders
);
4438 if(httpuseragent
is not None):
4439 if('User-Agent' in httpheaders
):
4440 httpheaders
['User-Agent'] = httpuseragent
;
4442 httpuseragent
.update({'User-Agent': httpuseragent
});
4443 if(httpreferer
is not None):
4444 if('Referer' in httpheaders
):
4445 httpheaders
['Referer'] = httpreferer
;
4447 httpuseragent
.update({'Referer': httpreferer
});
4448 if(urlparts
.username
is not None or urlparts
.password
is not None):
4449 if(sys
.version
[0]=="2"):
4450 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
4451 if(sys
.version
[0]>="3"):
4452 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4453 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4454 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
4455 if(isinstance(httpheaders
, dict)):
4456 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
4457 geturls_opener
.addheaders
= httpheaders
;
4459 if(postdata
is not None and not isinstance(postdata
, dict)):
4460 postdata
= urlencode(postdata
);
4461 retrieved_body
= BytesIO();
4462 retrieved_headers
= BytesIO();
4464 if(httpmethod
=="GET"):
4465 geturls_text
= pycurl
.Curl();
4466 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4467 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4468 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4469 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4470 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4471 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4472 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4473 geturls_text
.perform();
4474 elif(httpmethod
=="POST"):
4475 geturls_text
= pycurl
.Curl();
4476 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4477 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4478 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4479 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4480 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4481 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4482 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4483 geturls_text
.setopt(geturls_text
.POST
, True);
4484 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
4485 geturls_text
.perform();
4487 geturls_text
= pycurl
.Curl();
4488 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4489 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4490 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4491 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4492 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4493 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4494 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4495 geturls_text
.perform();
4496 retrieved_headers
.seek(0);
4497 if(sys
.version
[0]=="2"):
4498 pycurlhead
= retrieved_headers
.read();
4499 if(sys
.version
[0]>="3"):
4500 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
4501 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip().rstrip('\r\n'))[0];
4502 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
4503 retrieved_body
.seek(0);
4504 except socket
.timeout
:
4505 log
.info("Error With URL "+httpurl
);
4507 except socket
.gaierror
:
4508 log
.info("Error With URL "+httpurl
);
4511 log
.info("Error With URL "+httpurl
);
4513 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
4514 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
4515 httpversionout
= pyhttpverinfo
[0];
4516 httpmethodout
= httpmethod
;
4517 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
4518 httpheaderout
= pycurlheadersout
;
4519 httpheadersentout
= httpheaders
;
4520 if(isinstance(httpheaderout
, list)):
4521 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
4522 if(sys
.version
[0]=="2"):
4524 prehttpheaderout
= httpheaderout
;
4525 httpheaderkeys
= httpheaderout
.keys();
4526 imax
= len(httpheaderkeys
);
4530 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4532 except AttributeError:
4534 httpheaderout
= fix_header_names(httpheaderout
);
4535 if(isinstance(httpheadersentout
, list)):
4536 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
4537 httpheadersentout
= fix_header_names(httpheadersentout
);
4538 downloadsize
= httpheaderout
.get('Content-Length');
4539 if(downloadsize
is not None):
4540 downloadsize
= int(downloadsize
);
4541 if downloadsize
is None: downloadsize
= 0;
4544 log
.info("Downloading URL "+httpurl
);
4545 with
BytesIO() as strbuf
:
4547 databytes
= retrieved_body
.read(buffersize
);
4548 if not databytes
: break;
4549 datasize
= len(databytes
);
4550 fulldatasize
= datasize
+ fulldatasize
;
4553 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4554 downloaddiff
= fulldatasize
- prevdownsize
;
4555 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4556 prevdownsize
= fulldatasize
;
4557 strbuf
.write(databytes
);
4559 returnval_content
= strbuf
.read();
4560 if(httpheaderout
.get("Content-Encoding")=="gzip"):
4562 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
4565 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
4567 returnval_content
= zlib
.decompress(returnval_content
);
4570 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4572 returnval_content
= brotli
.decompress(returnval_content
);
4573 except brotli
.error
:
4575 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
4577 returnval_content
= zstandard
.decompress(returnval_content
);
4578 except zstandard
.error
:
4580 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
4582 returnval_content
= lzma
.decompress(returnval_content
);
4583 except zstandard
.error
:
4585 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
4587 returnval_content
= bz2
.decompress(returnval_content
);
4588 except zstandard
.error
:
4590 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl3"};
4591 geturls_text
.close();
4595 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4596 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4599 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4600 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4601 returnval
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4604 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4605 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4606 returnval
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4609 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4610 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4611 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4612 exec_time_start
= time
.time();
4613 myhash
= hashlib
.new("sha1");
4614 if(sys
.version
[0]=="2"):
4615 myhash
.update(httpurl
);
4616 myhash
.update(str(buffersize
));
4617 myhash
.update(str(exec_time_start
));
4618 if(sys
.version
[0]>="3"):
4619 myhash
.update(httpurl
.encode('utf-8'));
4620 myhash
.update(str(buffersize
).encode('utf-8'));
4621 myhash
.update(str(exec_time_start
).encode('utf-8'));
4622 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4624 sleep
= geturls_download_sleep
;
4627 pretmpfilename
= download_from_url_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4628 if(not pretmpfilename
):
4630 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4631 tmpfilename
= f
.name
;
4633 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4634 except AttributeError:
4636 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4641 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4642 f
.write(pretmpfilename
.get('Content'));
4644 exec_time_end
= time
.time();
4645 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4646 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4650 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4651 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4654 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4655 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4656 returnval
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4659 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4660 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4661 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4664 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4665 def download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4666 global geturls_download_sleep
, havezstd
, havebrotli
;
4668 sleep
= geturls_download_sleep
;
4671 if(not outfile
=="-"):
4672 outpath
= outpath
.rstrip(os
.path
.sep
);
4673 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4674 if(not os
.path
.exists(outpath
)):
4675 os
.makedirs(outpath
);
4676 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4678 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4680 pretmpfilename
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4681 if(not pretmpfilename
):
4683 tmpfilename
= pretmpfilename
.get('Filename');
4684 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4686 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4687 exec_time_start
= time
.time();
4688 shutil
.move(tmpfilename
, filepath
);
4690 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4691 except AttributeError:
4693 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4698 exec_time_end
= time
.time();
4699 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4700 if(os
.path
.exists(tmpfilename
)):
4701 os
.remove(tmpfilename
);
4702 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4704 pretmpfilename
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4705 tmpfilename
= pretmpfilename
.get('Filename');
4706 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4709 exec_time_start
= time
.time();
4710 with
open(tmpfilename
, 'rb') as ft
:
4713 databytes
= ft
.read(buffersize
[1]);
4714 if not databytes
: break;
4715 datasize
= len(databytes
);
4716 fulldatasize
= datasize
+ fulldatasize
;
4719 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4720 downloaddiff
= fulldatasize
- prevdownsize
;
4721 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4722 prevdownsize
= fulldatasize
;
4725 fdata
= f
.getvalue();
4728 os
.remove(tmpfilename
);
4729 exec_time_end
= time
.time();
4730 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4731 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4735 def download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4736 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4739 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4740 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4741 returnval
= download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4744 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4745 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4746 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4749 def download_file_from_ftp_file(url
):
4750 urlparts
= urlparse
.urlparse(url
);
4751 file_name
= os
.path
.basename(urlparts
.path
);
4752 file_dir
= os
.path
.dirname(urlparts
.path
);
4753 if(urlparts
.username
is not None):
4754 ftp_username
= urlparts
.username
;
4756 ftp_username
= "anonymous";
4757 if(urlparts
.password
is not None):
4758 ftp_password
= urlparts
.password
;
4759 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4760 ftp_password
= "anonymous";
4763 if(urlparts
.scheme
=="ftp"):
4765 elif(urlparts
.scheme
=="ftps"):
4769 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4771 ftp_port
= urlparts
.port
;
4772 if(urlparts
.port
is None):
4775 ftp
.connect(urlparts
.hostname
, ftp_port
);
4776 except socket
.gaierror
:
4777 log
.info("Error With URL "+httpurl
);
4779 except socket
.timeout
:
4780 log
.info("Error With URL "+httpurl
);
4782 ftp
.login(urlparts
.username
, urlparts
.password
);
4783 if(urlparts
.scheme
=="ftps"):
4785 ftpfile
= BytesIO();
4786 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
4787 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
4792 def download_file_from_ftp_string(url
):
4793 ftpfile
= download_file_from_ftp_file(url
);
4794 return ftpfile
.read();
4796 def download_from_url_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4797 global geturls_download_sleep
, havezstd
, havebrotli
;
4799 sleep
= geturls_download_sleep
;
4802 urlparts
= urlparse
.urlparse(httpurl
);
4803 if(isinstance(httpheaders
, list)):
4804 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4805 httpheaders
= fix_header_names(httpheaders
);
4806 if(httpuseragent
is not None):
4807 if('User-Agent' in httpheaders
):
4808 httpheaders
['User-Agent'] = httpuseragent
;
4810 httpuseragent
.update({'User-Agent': httpuseragent
});
4811 if(httpreferer
is not None):
4812 if('Referer' in httpheaders
):
4813 httpheaders
['Referer'] = httpreferer
;
4815 httpuseragent
.update({'Referer': httpreferer
});
4816 if(isinstance(httpheaders
, dict)):
4817 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4819 geturls_text
= download_file_from_ftp_file(httpurl
);
4820 if(not geturls_text
):
4822 downloadsize
= None;
4823 if(downloadsize
is not None):
4824 downloadsize
= int(downloadsize
);
4825 if downloadsize
is None: downloadsize
= 0;
4828 log
.info("Downloading URL "+httpurl
);
4829 with
BytesIO() as strbuf
:
4831 databytes
= geturls_text
.read(buffersize
);
4832 if not databytes
: break;
4833 datasize
= len(databytes
);
4834 fulldatasize
= datasize
+ fulldatasize
;
4837 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4838 downloaddiff
= fulldatasize
- prevdownsize
;
4839 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4840 prevdownsize
= fulldatasize
;
4841 strbuf
.write(databytes
);
4843 returnval_content
= strbuf
.read();
4844 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4845 geturls_text
.close();
4848 def download_from_url_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4849 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4850 exec_time_start
= time
.time();
4851 myhash
= hashlib
.new("sha1");
4852 if(sys
.version
[0]=="2"):
4853 myhash
.update(httpurl
);
4854 myhash
.update(str(buffersize
));
4855 myhash
.update(str(exec_time_start
));
4856 if(sys
.version
[0]>="3"):
4857 myhash
.update(httpurl
.encode('utf-8'));
4858 myhash
.update(str(buffersize
).encode('utf-8'));
4859 myhash
.update(str(exec_time_start
).encode('utf-8'));
4860 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4862 sleep
= geturls_download_sleep
;
4865 pretmpfilename
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4866 if(not pretmpfilename
):
4868 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4869 tmpfilename
= f
.name
;
4871 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4872 except AttributeError:
4874 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4879 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4880 f
.write(pretmpfilename
.get('Content'));
4882 exec_time_end
= time
.time();
4883 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4884 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4887 def download_from_url_to_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4888 global geturls_download_sleep
, havezstd
, havebrotli
;
4890 sleep
= geturls_download_sleep
;
4893 if(not outfile
=="-"):
4894 outpath
= outpath
.rstrip(os
.path
.sep
);
4895 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4896 if(not os
.path
.exists(outpath
)):
4897 os
.makedirs(outpath
);
4898 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4900 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4902 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4903 if(not pretmpfilename
):
4905 tmpfilename
= pretmpfilename
.get('Filename');
4906 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4908 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4909 exec_time_start
= time
.time();
4910 shutil
.move(tmpfilename
, filepath
);
4911 exec_time_end
= time
.time();
4912 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4913 if(os
.path
.exists(tmpfilename
)):
4914 os
.remove(tmpfilename
);
4915 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4917 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4918 tmpfilename
= pretmpfilename
.get('Filename');
4919 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4922 exec_time_start
= time
.time();
4923 with
open(tmpfilename
, 'rb') as ft
:
4926 databytes
= ft
.read(buffersize
[1]);
4927 if not databytes
: break;
4928 datasize
= len(databytes
);
4929 fulldatasize
= datasize
+ fulldatasize
;
4932 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4933 downloaddiff
= fulldatasize
- prevdownsize
;
4934 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4935 prevdownsize
= fulldatasize
;
4938 fdata
= f
.getvalue();
4941 os
.remove(tmpfilename
);
4942 exec_time_end
= time
.time();
4943 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4944 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4947 def upload_file_to_ftp_file(ftpfile
, url
):
4948 urlparts
= urlparse
.urlparse(url
);
4949 file_name
= os
.path
.basename(urlparts
.path
);
4950 file_dir
= os
.path
.dirname(urlparts
.path
);
4951 if(urlparts
.username
is not None):
4952 ftp_username
= urlparts
.username
;
4954 ftp_username
= "anonymous";
4955 if(urlparts
.password
is not None):
4956 ftp_password
= urlparts
.password
;
4957 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4958 ftp_password
= "anonymous";
4961 if(urlparts
.scheme
=="ftp"):
4963 elif(urlparts
.scheme
=="ftps"):
4967 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4969 ftp_port
= urlparts
.port
;
4970 if(urlparts
.port
is None):
4973 ftp
.connect(urlparts
.hostname
, ftp_port
);
4974 except socket
.gaierror
:
4975 log
.info("Error With URL "+httpurl
);
4977 except socket
.timeout
:
4978 log
.info("Error With URL "+httpurl
);
4980 ftp
.login(urlparts
.username
, urlparts
.password
);
4981 if(urlparts
.scheme
=="ftps"):
4983 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
4988 def upload_file_to_ftp_string(ftpstring
, url
):
4989 ftpfileo
= BytesIO(ftpstring
);
4990 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
4995 def download_file_from_sftp_file(url
):
4996 urlparts
= urlparse
.urlparse(url
);
4997 file_name
= os
.path
.basename(urlparts
.path
);
4998 file_dir
= os
.path
.dirname(urlparts
.path
);
4999 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5001 sftp_port
= urlparts
.port
;
5002 if(urlparts
.port
is None):
5005 sftp_port
= urlparts
.port
;
5006 if(urlparts
.username
is not None):
5007 sftp_username
= urlparts
.username
;
5009 sftp_username
= "anonymous";
5010 if(urlparts
.password
is not None):
5011 sftp_password
= urlparts
.password
;
5012 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5013 sftp_password
= "anonymous";
5016 if(urlparts
.scheme
!="sftp"):
5018 ssh
= paramiko
.SSHClient();
5019 ssh
.load_system_host_keys();
5020 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
5022 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5023 except paramiko
.ssh_exception
.SSHException
:
5025 except socket
.gaierror
:
5026 log
.info("Error With URL "+httpurl
);
5028 except socket
.timeout
:
5029 log
.info("Error With URL "+httpurl
);
5031 sftp
= ssh
.open_sftp();
5032 sftpfile
= BytesIO();
5033 sftp
.getfo(urlparts
.path
, sftpfile
);
5036 sftpfile
.seek(0, 0);
5039 def download_file_from_sftp_file(url
):
5043 def download_file_from_sftp_string(url
):
5044 sftpfile
= download_file_from_sftp_file(url
);
5045 return sftpfile
.read();
5047 def download_file_from_ftp_string(url
):
5051 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5052 global geturls_download_sleep
, havezstd
, havebrotli
;
5054 sleep
= geturls_download_sleep
;
5057 urlparts
= urlparse
.urlparse(httpurl
);
5058 if(isinstance(httpheaders
, list)):
5059 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
5060 httpheaders
= fix_header_names(httpheaders
);
5061 if(httpuseragent
is not None):
5062 if('User-Agent' in httpheaders
):
5063 httpheaders
['User-Agent'] = httpuseragent
;
5065 httpuseragent
.update({'User-Agent': httpuseragent
});
5066 if(httpreferer
is not None):
5067 if('Referer' in httpheaders
):
5068 httpheaders
['Referer'] = httpreferer
;
5070 httpuseragent
.update({'Referer': httpreferer
});
5071 if(isinstance(httpheaders
, dict)):
5072 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
5074 geturls_text
= download_file_from_sftp_file(httpurl
);
5075 if(not geturls_text
):
5077 downloadsize
= None;
5078 if(downloadsize
is not None):
5079 downloadsize
= int(downloadsize
);
5080 if downloadsize
is None: downloadsize
= 0;
5083 log
.info("Downloading URL "+httpurl
);
5084 with
BytesIO() as strbuf
:
5086 databytes
= geturls_text
.read(buffersize
);
5087 if not databytes
: break;
5088 datasize
= len(databytes
);
5089 fulldatasize
= datasize
+ fulldatasize
;
5092 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5093 downloaddiff
= fulldatasize
- prevdownsize
;
5094 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5095 prevdownsize
= fulldatasize
;
5096 strbuf
.write(databytes
);
5098 returnval_content
= strbuf
.read();
5099 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5100 geturls_text
.close();
5103 if(not haveparamiko
):
5104 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5108 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5109 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
5110 exec_time_start
= time
.time();
5111 myhash
= hashlib
.new("sha1");
5112 if(sys
.version
[0]=="2"):
5113 myhash
.update(httpurl
);
5114 myhash
.update(str(buffersize
));
5115 myhash
.update(str(exec_time_start
));
5116 if(sys
.version
[0]>="3"):
5117 myhash
.update(httpurl
.encode('utf-8'));
5118 myhash
.update(str(buffersize
).encode('utf-8'));
5119 myhash
.update(str(exec_time_start
).encode('utf-8'));
5120 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
5122 sleep
= geturls_download_sleep
;
5125 pretmpfilename
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
5126 if(not pretmpfilename
):
5128 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
5129 tmpfilename
= f
.name
;
5131 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
5132 except AttributeError:
5134 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
5139 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5140 f
.write(pretmpfilename
.get('Content'));
5142 exec_time_end
= time
.time();
5143 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
5144 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
5147 if(not haveparamiko
):
5148 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5152 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5153 global geturls_download_sleep
, havezstd
, havebrotli
;
5155 sleep
= geturls_download_sleep
;
5158 if(not outfile
=="-"):
5159 outpath
= outpath
.rstrip(os
.path
.sep
);
5160 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
5161 if(not os
.path
.exists(outpath
)):
5162 os
.makedirs(outpath
);
5163 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
5165 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
5167 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5168 if(not pretmpfilename
):
5170 tmpfilename
= pretmpfilename
.get('Filename');
5171 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5173 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
5174 exec_time_start
= time
.time();
5175 shutil
.move(tmpfilename
, filepath
);
5176 exec_time_end
= time
.time();
5177 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
5178 if(os
.path
.exists(tmpfilename
)):
5179 os
.remove(tmpfilename
);
5180 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5182 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5183 tmpfilename
= pretmpfilename
.get('Filename');
5184 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5187 exec_time_start
= time
.time();
5188 with
open(tmpfilename
, 'rb') as ft
:
5191 databytes
= ft
.read(buffersize
[1]);
5192 if not databytes
: break;
5193 datasize
= len(databytes
);
5194 fulldatasize
= datasize
+ fulldatasize
;
5197 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5198 downloaddiff
= fulldatasize
- prevdownsize
;
5199 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5200 prevdownsize
= fulldatasize
;
5203 fdata
= f
.getvalue();
5206 os
.remove(tmpfilename
);
5207 exec_time_end
= time
.time();
5208 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5209 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5212 if(not haveparamiko
):
5213 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5217 def upload_file_to_sftp_file(sftpfile
, url
):
5218 urlparts
= urlparse
.urlparse(url
);
5219 file_name
= os
.path
.basename(urlparts
.path
);
5220 file_dir
= os
.path
.dirname(urlparts
.path
);
5221 sftp_port
= urlparts
.port
;
5222 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5224 if(urlparts
.port
is None):
5227 sftp_port
= urlparts
.port
;
5228 if(urlparts
.username
is not None):
5229 sftp_username
= urlparts
.username
;
5231 sftp_username
= "anonymous";
5232 if(urlparts
.password
is not None):
5233 sftp_password
= urlparts
.password
;
5234 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5235 sftp_password
= "anonymous";
5238 if(urlparts
.scheme
!="sftp"):
5240 ssh
= paramiko
.SSHClient();
5241 ssh
.load_system_host_keys();
5242 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
5244 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5245 except paramiko
.ssh_exception
.SSHException
:
5247 except socket
.gaierror
:
5248 log
.info("Error With URL "+httpurl
);
5250 except socket
.timeout
:
5251 log
.info("Error With URL "+httpurl
);
5253 sftp
= ssh
.open_sftp();
5254 sftp
.putfo(sftpfile
, urlparts
.path
);
5257 sftpfile
.seek(0, 0);
5260 def upload_file_to_sftp_file(sftpfile
, url
):
5264 def upload_file_to_sftp_string(sftpstring
, url
):
5265 sftpfileo
= BytesIO(sftpstring
);
5266 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
5270 def upload_file_to_sftp_string(url
):
5275 def download_file_from_pysftp_file(url
):
5276 urlparts
= urlparse
.urlparse(url
);
5277 file_name
= os
.path
.basename(urlparts
.path
);
5278 file_dir
= os
.path
.dirname(urlparts
.path
);
5279 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5281 sftp_port
= urlparts
.port
;
5282 if(urlparts
.port
is None):
5285 sftp_port
= urlparts
.port
;
5286 if(urlparts
.username
is not None):
5287 sftp_username
= urlparts
.username
;
5289 sftp_username
= "anonymous";
5290 if(urlparts
.password
is not None):
5291 sftp_password
= urlparts
.password
;
5292 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5293 sftp_password
= "anonymous";
5296 if(urlparts
.scheme
!="sftp"):
5299 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5300 except paramiko
.ssh_exception
.SSHException
:
5302 except socket
.gaierror
:
5303 log
.info("Error With URL "+httpurl
);
5305 except socket
.timeout
:
5306 log
.info("Error With URL "+httpurl
);
5308 sftp
= ssh
.open_sftp();
5309 sftpfile
= BytesIO();
5310 sftp
.getfo(urlparts
.path
, sftpfile
);
5313 sftpfile
.seek(0, 0);
5316 def download_file_from_pysftp_file(url
):
5320 def download_file_from_pysftp_string(url
):
5321 sftpfile
= download_file_from_pysftp_file(url
);
5322 return sftpfile
.read();
5324 def download_file_from_ftp_string(url
):
5328 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5329 global geturls_download_sleep
, havezstd
, havebrotli
;
5331 sleep
= geturls_download_sleep
;
5334 urlparts
= urlparse
.urlparse(httpurl
);
5335 if(isinstance(httpheaders
, list)):
5336 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
5337 httpheaders
= fix_header_names(httpheaders
);
5338 if(isinstance(httpheaders
, dict)):
5339 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
5341 geturls_text
= download_file_from_pysftp_file(httpurl
);
5342 if(not geturls_text
):
5344 downloadsize
= None;
5345 if(downloadsize
is not None):
5346 downloadsize
= int(downloadsize
);
5347 if downloadsize
is None: downloadsize
= 0;
5350 log
.info("Downloading URL "+httpurl
);
5351 with
BytesIO() as strbuf
:
5353 databytes
= geturls_text
.read(buffersize
);
5354 if not databytes
: break;
5355 datasize
= len(databytes
);
5356 fulldatasize
= datasize
+ fulldatasize
;
5359 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5360 downloaddiff
= fulldatasize
- prevdownsize
;
5361 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5362 prevdownsize
= fulldatasize
;
5363 strbuf
.write(databytes
);
5365 returnval_content
= strbuf
.read();
5366 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5367 geturls_text
.close();
5371 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5375 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5376 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
5377 exec_time_start
= time
.time();
5378 myhash
= hashlib
.new("sha1");
5379 if(sys
.version
[0]=="2"):
5380 myhash
.update(httpurl
);
5381 myhash
.update(str(buffersize
));
5382 myhash
.update(str(exec_time_start
));
5383 if(sys
.version
[0]>="3"):
5384 myhash
.update(httpurl
.encode('utf-8'));
5385 myhash
.update(str(buffersize
).encode('utf-8'));
5386 myhash
.update(str(exec_time_start
).encode('utf-8'));
5387 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
5389 sleep
= geturls_download_sleep
;
5392 pretmpfilename
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
5393 if(not pretmpfilename
):
5395 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
5396 tmpfilename
= f
.name
;
5398 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
5399 except AttributeError:
5401 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
5406 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5407 f
.write(pretmpfilename
.get('Content'));
5409 exec_time_end
= time
.time();
5410 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
5411 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
5415 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5419 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5420 global geturls_download_sleep
, havezstd
, havebrotli
;
5422 sleep
= geturls_download_sleep
;
5425 if(not outfile
=="-"):
5426 outpath
= outpath
.rstrip(os
.path
.sep
);
5427 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
5428 if(not os
.path
.exists(outpath
)):
5429 os
.makedirs(outpath
);
5430 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
5432 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
5434 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5435 if(not pretmpfilename
):
5437 tmpfilename
= pretmpfilename
.get('Filename');
5438 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5440 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
5441 exec_time_start
= time
.time();
5442 shutil
.move(tmpfilename
, filepath
);
5443 exec_time_end
= time
.time();
5444 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
5445 if(os
.path
.exists(tmpfilename
)):
5446 os
.remove(tmpfilename
);
5447 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5449 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5450 tmpfilename
= pretmpfilename
.get('Filename');
5451 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5454 exec_time_start
= time
.time();
5455 with
open(tmpfilename
, 'rb') as ft
:
5458 databytes
= ft
.read(buffersize
[1]);
5459 if not databytes
: break;
5460 datasize
= len(databytes
);
5461 fulldatasize
= datasize
+ fulldatasize
;
5464 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5465 downloaddiff
= fulldatasize
- prevdownsize
;
5466 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5467 prevdownsize
= fulldatasize
;
5470 fdata
= f
.getvalue();
5473 os
.remove(tmpfilename
);
5474 exec_time_end
= time
.time();
5475 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5476 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5480 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5484 def upload_file_to_pysftp_file(sftpfile
, url
):
5485 urlparts
= urlparse
.urlparse(url
);
5486 file_name
= os
.path
.basename(urlparts
.path
);
5487 file_dir
= os
.path
.dirname(urlparts
.path
);
5488 sftp_port
= urlparts
.port
;
5489 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5491 if(urlparts
.port
is None):
5494 sftp_port
= urlparts
.port
;
5495 if(urlparts
.username
is not None):
5496 sftp_username
= urlparts
.username
;
5498 sftp_username
= "anonymous";
5499 if(urlparts
.password
is not None):
5500 sftp_password
= urlparts
.password
;
5501 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5502 sftp_password
= "anonymous";
5505 if(urlparts
.scheme
!="sftp"):
5508 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5509 except paramiko
.ssh_exception
.SSHException
:
5511 except socket
.gaierror
:
5512 log
.info("Error With URL "+httpurl
);
5514 except socket
.timeout
:
5515 log
.info("Error With URL "+httpurl
);
5517 sftp
= ssh
.open_sftp();
5518 sftp
.putfo(sftpfile
, urlparts
.path
);
5521 sftpfile
.seek(0, 0);
5524 def upload_file_to_pysftp_file(sftpfile
, url
):
5528 def upload_file_to_pysftp_string(sftpstring
, url
):
5529 sftpfileo
= BytesIO(sftpstring
);
5530 sftpfile
= upload_file_to_pysftp_files(ftpfileo
, url
);
5534 def upload_file_to_pysftp_string(url
):