4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwgetold.py - Last Update: 10/5/2023 Ver. 2.0.2 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, zlib
, time
, argparse
, subprocess
, socket
, email
.utils
, datetime
, time
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
24 from cgi
import parse_qsl
;
26 from urlparse
import parse_qsl
;
27 except ModuleNotFoundError
:
28 from urlparse
import parse_qsl
;
29 except (DeprecationWarning, TypeError):
30 from urlparse
import parse_qsl
;
37 havemechanize
= False;
42 havemechanize
= False;
70 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
110 if(sys
.version
[0]=="2"):
112 from io
import StringIO
, BytesIO
;
115 from cStringIO
import StringIO
;
116 from cStringIO
import StringIO
as BytesIO
;
118 from StringIO
import StringIO
;
119 from StringIO
import StringIO
as BytesIO
;
120 # From http://python-future.org/compatible_idioms.html
121 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
122 from urllib
import urlencode
;
123 from urllib
import urlopen
as urlopenalt
;
124 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
, URLError
, build_opener
, HTTPCookieProcessor
;
125 import urlparse
, cookielib
;
126 from httplib
import HTTPConnection
, HTTPSConnection
;
127 if(sys
.version
[0]>="3"):
128 from io
import StringIO
, BytesIO
;
129 # From http://python-future.org/compatible_idioms.html
130 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
131 from urllib
.request
import urlopen
, Request
, install_opener
, build_opener
, HTTPCookieProcessor
;
132 from urllib
.error
import HTTPError
, URLError
;
133 import urllib
.parse
as urlparse
;
134 import http
.cookiejar
as cookielib
;
135 from http
.client
import HTTPConnection
, HTTPSConnection
;
137 __program_name__
= "PyWWW-Get";
138 __program_alt_name__
= "PyWWWGet";
139 __program_small_name__
= "wwwget";
140 __project__
= __program_name__
;
141 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
142 __version_info__
= (2, 0, 2, "RC 1", 1);
143 __version_date_info__
= (2023, 10, 5, "RC 1", 1);
144 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
145 __revision__
= __version_info__
[3];
146 __revision_id__
= "$Id$";
147 if(__version_info__
[4] is not None):
148 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
149 if(__version_info__
[4] is None):
150 __version_date_plusrc__
= __version_date__
;
151 if(__version_info__
[3] is not None):
152 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
153 if(__version_info__
[3] is None):
154 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
156 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
158 pytempdir
= tempfile
.gettempdir();
160 PyBitness
= platform
.architecture();
161 if(PyBitness
=="32bit" or PyBitness
=="32"):
163 elif(PyBitness
=="64bit" or PyBitness
=="64"):
168 compression_supported_list
= ['identity', 'gzip', 'deflate', 'bzip2'];
170 compression_supported_list
.append('br');
172 compression_supported_list
.append('zstd');
174 compression_supported_list
.append('lzma');
175 compression_supported_list
.append('xz');
176 compression_supported
= ', '.join(compression_supported_list
);
178 geturls_cj
= cookielib
.CookieJar();
179 windowsNT4_ua_string
= "Windows NT 4.0";
180 windowsNT4_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
181 windows2k_ua_string
= "Windows NT 5.0";
182 windows2k_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
183 windowsXP_ua_string
= "Windows NT 5.1";
184 windowsXP_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
185 windowsXP64_ua_string
= "Windows NT 5.2; Win64; x64";
186 windowsXP64_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
187 windows7_ua_string
= "Windows NT 6.1; Win64; x64";
188 windows7_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
189 windows8_ua_string
= "Windows NT 6.2; Win64; x64";
190 windows8_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
191 windows81_ua_string
= "Windows NT 6.3; Win64; x64";
192 windows81_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
193 windows10_ua_string
= "Windows NT 10.0; Win64; x64";
194 windows10_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
195 windows11_ua_string
= "Windows NT 11.0; Win64; x64";
196 windows11_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
197 geturls_ua_firefox_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:109.0) Gecko/20100101 Firefox/117.0";
198 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
199 geturls_ua_chrome_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
200 geturls_ua_chromium_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
201 geturls_ua_palemoon_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
202 geturls_ua_opera_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
203 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
204 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; Trident/7.0; rv:11.0) like Gecko";
205 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
206 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
207 if(platform
.python_implementation()!=""):
208 py_implementation
= platform
.python_implementation();
209 if(platform
.python_implementation()==""):
210 py_implementation
= "Python";
211 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=py_implementation
, pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
212 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
213 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
214 geturls_ua
= geturls_ua_firefox_windows7
;
215 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
216 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
217 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
218 geturls_headers_chrome_windows7
.update(windows7_ua_addon
);
219 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
220 geturls_headers_chromium_windows7
.update(windows7_ua_addon
);
221 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
222 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
223 geturls_headers_opera_windows7
.update(windows7_ua_addon
);
224 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
225 geturls_headers_vivaldi_windows7
.update(windows7_ua_addon
);
226 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
227 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
228 geturls_headers_microsoft_edge_windows7
.update(windows7_ua_addon
);
229 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
230 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
231 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
232 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
233 geturls_headers
= geturls_headers_firefox_windows7
;
234 geturls_download_sleep
= 0;
236 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
237 if(outtype
=="print" and dbgenable
):
240 elif(outtype
=="log" and dbgenable
):
241 logging
.info(dbgtxt
);
243 elif(outtype
=="warning" and dbgenable
):
244 logging
.warning(dbgtxt
);
246 elif(outtype
=="error" and dbgenable
):
247 logging
.error(dbgtxt
);
249 elif(outtype
=="critical" and dbgenable
):
250 logging
.critical(dbgtxt
);
252 elif(outtype
=="exception" and dbgenable
):
253 logging
.exception(dbgtxt
);
255 elif(outtype
=="logalt" and dbgenable
):
256 logging
.log(dgblevel
, dbgtxt
);
258 elif(outtype
=="debug" and dbgenable
):
259 logging
.debug(dbgtxt
);
267 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
268 dbgout
= verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
273 def add_url_param(url
, **params
):
275 parts
= list(urlparse
.urlsplit(url
));
276 d
= dict(parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
278 parts
[n
]=urlencode(d
);
279 return urlparse
.urlunsplit(parts
);
281 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
282 def which_exec(execfile):
283 for path
in os
.environ
["PATH"].split(":"):
284 if os
.path
.exists(path
+ "/" + execfile):
285 return path
+ "/" + execfile;
287 def listize(varlist
):
295 newlistreg
.update({ilx
: varlist
[il
]});
296 newlistrev
.update({varlist
[il
]: ilx
});
299 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
302 def twolistize(varlist
):
312 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
313 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
314 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
315 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
318 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
319 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
320 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
323 def arglistize(proexec
, *varlist
):
327 newarglist
= [proexec
];
329 if varlist
[il
][0] is not None:
330 newarglist
.append(varlist
[il
][0]);
331 if varlist
[il
][1] is not None:
332 newarglist
.append(varlist
[il
][1]);
336 def fix_header_names(header_dict
):
337 if(sys
.version
[0]=="2"):
338 header_dict
= {k
.title(): v
for k
, v
in header_dict
.iteritems()};
339 if(sys
.version
[0]>="3"):
340 header_dict
= {k
.title(): v
for k
, v
in header_dict
.items()};
343 # hms_string by ArcGIS Python Recipes
344 # https://arcpy.wordpress.com/2012/04/20/146/
345 def hms_string(sec_elapsed
):
346 h
= int(sec_elapsed
/ (60 * 60));
347 m
= int((sec_elapsed
% (60 * 60)) / 60);
348 s
= sec_elapsed
% 60.0;
349 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
351 # get_readable_size by Lipis
352 # http://stackoverflow.com/posts/14998888/revisions
353 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
355 if(unit
!="IEC" and unit
!="SI"):
358 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
359 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
362 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
363 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
368 if abs(bytes
) < unitsize
:
369 strformat
= "%3."+str(precision
)+"f%s";
370 pre_return_val
= (strformat
% (bytes
, unit
));
371 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
372 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
373 alt_return_val
= pre_return_val
.split();
374 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
377 strformat
= "%."+str(precision
)+"f%s";
378 pre_return_val
= (strformat
% (bytes
, "YiB"));
379 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
380 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
381 alt_return_val
= pre_return_val
.split();
382 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
385 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
387 usehashtypes
= usehashtypes
.lower();
388 getfilesize
= os
.path
.getsize(infile
);
389 return_val
= get_readable_size(getfilesize
, precision
, unit
);
391 hashtypelist
= usehashtypes
.split(",");
392 openfile
= open(infile
, "rb");
393 filecontents
= openfile
.read();
396 listnumend
= len(hashtypelist
);
397 while(listnumcount
< listnumend
):
398 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
399 hashtypelistup
= hashtypelistlow
.upper();
400 filehash
= hashlib
.new(hashtypelistup
);
401 filehash
.update(filecontents
);
402 filegethash
= filehash
.hexdigest();
403 return_val
.update({hashtypelistup
: filegethash
});
407 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
409 usehashtypes
= usehashtypes
.lower();
410 getfilesize
= len(instring
);
411 return_val
= get_readable_size(getfilesize
, precision
, unit
);
413 hashtypelist
= usehashtypes
.split(",");
415 listnumend
= len(hashtypelist
);
416 while(listnumcount
< listnumend
):
417 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
418 hashtypelistup
= hashtypelistlow
.upper();
419 filehash
= hashlib
.new(hashtypelistup
);
420 if(sys
.version
[0]=="2"):
421 filehash
.update(instring
);
422 if(sys
.version
[0]>="3"):
423 filehash
.update(instring
.encode('utf-8'));
424 filegethash
= filehash
.hexdigest();
425 return_val
.update({hashtypelistup
: filegethash
});
429 def http_status_to_reason(code
):
432 101: 'Switching Protocols',
437 203: 'Non-Authoritative Information',
439 205: 'Reset Content',
440 206: 'Partial Content',
442 208: 'Already Reported',
444 300: 'Multiple Choices',
445 301: 'Moved Permanently',
450 307: 'Temporary Redirect',
451 308: 'Permanent Redirect',
454 402: 'Payment Required',
457 405: 'Method Not Allowed',
458 406: 'Not Acceptable',
459 407: 'Proxy Authentication Required',
460 408: 'Request Timeout',
463 411: 'Length Required',
464 412: 'Precondition Failed',
465 413: 'Payload Too Large',
467 415: 'Unsupported Media Type',
468 416: 'Range Not Satisfiable',
469 417: 'Expectation Failed',
470 421: 'Misdirected Request',
471 422: 'Unprocessable Entity',
473 424: 'Failed Dependency',
474 426: 'Upgrade Required',
475 428: 'Precondition Required',
476 429: 'Too Many Requests',
477 431: 'Request Header Fields Too Large',
478 451: 'Unavailable For Legal Reasons',
479 500: 'Internal Server Error',
480 501: 'Not Implemented',
482 503: 'Service Unavailable',
483 504: 'Gateway Timeout',
484 505: 'HTTP Version Not Supported',
485 506: 'Variant Also Negotiates',
486 507: 'Insufficient Storage',
487 508: 'Loop Detected',
489 511: 'Network Authentication Required'
491 return reasons
.get(code
, 'Unknown Status Code');
493 def ftp_status_to_reason(code
):
495 110: 'Restart marker reply',
496 120: 'Service ready in nnn minutes',
497 125: 'Data connection already open; transfer starting',
498 150: 'File status okay; about to open data connection',
500 202: 'Command not implemented, superfluous at this site',
501 211: 'System status, or system help reply',
502 212: 'Directory status',
505 215: 'NAME system type',
506 220: 'Service ready for new user',
507 221: 'Service closing control connection',
508 225: 'Data connection open; no transfer in progress',
509 226: 'Closing data connection',
510 227: 'Entering Passive Mode',
511 230: 'User logged in, proceed',
512 250: 'Requested file action okay, completed',
513 257: '"PATHNAME" created',
514 331: 'User name okay, need password',
515 332: 'Need account for login',
516 350: 'Requested file action pending further information',
517 421: 'Service not available, closing control connection',
518 425: 'Can\'t open data connection',
519 426: 'Connection closed; transfer aborted',
520 450: 'Requested file action not taken',
521 451: 'Requested action aborted. Local error in processing',
522 452: 'Requested action not taken. Insufficient storage space in system',
523 500: 'Syntax error, command unrecognized',
524 501: 'Syntax error in parameters or arguments',
525 502: 'Command not implemented',
526 503: 'Bad sequence of commands',
527 504: 'Command not implemented for that parameter',
528 530: 'Not logged in',
529 532: 'Need account for storing files',
530 550: 'Requested action not taken. File unavailable',
531 551: 'Requested action aborted. Page type unknown',
532 552: 'Requested file action aborted. Exceeded storage allocation',
533 553: 'Requested action not taken. File name not allowed'
535 return reasons
.get(code
, 'Unknown Status Code');
537 def sftp_status_to_reason(code
):
541 2: 'SSH_FX_NO_SUCH_FILE',
542 3: 'SSH_FX_PERMISSION_DENIED',
544 5: 'SSH_FX_BAD_MESSAGE',
545 6: 'SSH_FX_NO_CONNECTION',
546 7: 'SSH_FX_CONNECTION_LOST',
547 8: 'SSH_FX_OP_UNSUPPORTED'
549 return reasons
.get(code
, 'Unknown Status Code');
551 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
552 if isinstance(headers
, dict):
554 if(sys
.version
[0]=="2"):
555 for headkey
, headvalue
in headers
.iteritems():
556 returnval
.append((headkey
, headvalue
));
557 if(sys
.version
[0]>="3"):
558 for headkey
, headvalue
in headers
.items():
559 returnval
.append((headkey
, headvalue
));
560 elif isinstance(headers
, list):
566 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
567 if isinstance(headers
, dict):
569 if(sys
.version
[0]=="2"):
570 for headkey
, headvalue
in headers
.iteritems():
571 returnval
.append(headkey
+": "+headvalue
);
572 if(sys
.version
[0]>="3"):
573 for headkey
, headvalue
in headers
.items():
574 returnval
.append(headkey
+": "+headvalue
);
575 elif isinstance(headers
, list):
581 def make_http_headers_from_pycurl_to_dict(headers
):
583 headers
= headers
.strip().split('\r\n');
584 for header
in headers
:
585 parts
= header
.split(': ', 1)
588 header_dict
[key
.title()] = value
;
591 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", compression_supported
), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
592 if isinstance(headers
, list):
597 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
599 elif isinstance(headers
, dict):
605 def get_httplib_support(checkvalue
=None):
606 global haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
608 returnval
.append("ftp");
609 returnval
.append("httplib");
611 returnval
.append("httplib2");
612 returnval
.append("urllib");
614 returnval
.append("urllib3");
615 returnval
.append("request3");
616 returnval
.append("request");
618 returnval
.append("requests");
620 returnval
.append("aiohttp");
622 returnval
.append("httpx");
623 returnval
.append("httpx2");
625 returnval
.append("mechanize");
627 returnval
.append("pycurl");
628 if(hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
629 returnval
.append("pycurl2");
630 if(hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
631 returnval
.append("pycurl3");
633 returnval
.append("sftp");
635 returnval
.append("pysftp");
636 if(not checkvalue
is None):
637 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
638 checkvalue
= "urllib";
639 if(checkvalue
=="httplib1"):
640 checkvalue
= "httplib";
641 if(checkvalue
in returnval
):
647 def check_httplib_support(checkvalue
="urllib"):
648 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
649 checkvalue
= "urllib";
650 if(checkvalue
=="httplib1"):
651 checkvalue
= "httplib";
652 returnval
= get_httplib_support(checkvalue
);
655 def get_httplib_support_list():
656 returnval
= get_httplib_support(None);
659 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1, timeout
=10):
660 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
662 sleep
= geturls_download_sleep
;
665 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
666 httplibuse
= "urllib";
667 if(httplibuse
=="httplib1"):
668 httplibuse
= "httplib";
669 if(not haverequests
and httplibuse
=="requests"):
670 httplibuse
= "urllib";
671 if(not haveaiohttp
and httplibuse
=="aiohttp"):
672 httplibuse
= "urllib";
673 if(not havehttpx
and httplibuse
=="httpx"):
674 httplibuse
= "urllib";
675 if(not havehttpx
and httplibuse
=="httpx2"):
676 httplibuse
= "urllib";
677 if(not havehttpcore
and httplibuse
=="httpcore"):
678 httplibuse
= "urllib";
679 if(not havehttpcore
and httplibuse
=="httpcore2"):
680 httplibuse
= "urllib";
681 if(not havemechanize
and httplibuse
=="mechanize"):
682 httplibuse
= "urllib";
683 if(not havepycurl
and httplibuse
=="pycurl"):
684 httplibuse
= "urllib";
685 if(not havepycurl
and httplibuse
=="pycurl2"):
686 httplibuse
= "urllib";
687 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
688 httplibuse
= "pycurl";
689 if(not havepycurl
and httplibuse
=="pycurl3"):
690 httplibuse
= "urllib";
691 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
692 httplibuse
= "pycurl2";
693 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
694 httplibuse
= "pycurl";
695 if(not havehttplib2
and httplibuse
=="httplib2"):
696 httplibuse
= "httplib";
697 if(not haveparamiko
and httplibuse
=="sftp"):
699 if(not havepysftp
and httplibuse
=="pysftp"):
701 if(httplibuse
=="urllib" or httplibuse
=="request"):
702 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
703 elif(httplibuse
=="request"):
704 returnval
= download_from_url_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
705 elif(httplibuse
=="request3"):
706 returnval
= download_from_url_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
707 elif(httplibuse
=="httplib"):
708 returnval
= download_from_url_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
709 elif(httplibuse
=="httplib2"):
710 returnval
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
711 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
712 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
713 elif(httplibuse
=="requests"):
714 returnval
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
715 elif(httplibuse
=="aiohttp"):
716 returnval
= download_from_url_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
717 elif(httplibuse
=="httpx"):
718 returnval
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
719 elif(httplibuse
=="httpx2"):
720 returnval
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
721 elif(httplibuse
=="httpcore"):
722 returnval
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
723 elif(httplibuse
=="httpcore2"):
724 returnval
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
725 elif(httplibuse
=="mechanize"):
726 returnval
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
727 elif(httplibuse
=="pycurl"):
728 returnval
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
729 elif(httplibuse
=="pycurl2"):
730 returnval
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
731 elif(httplibuse
=="pycurl3"):
732 returnval
= download_from_url_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
733 elif(httplibuse
=="ftp"):
734 returnval
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
735 elif(httplibuse
=="sftp"):
736 returnval
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
737 elif(httplibuse
=="pysftp"):
738 returnval
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
743 def download_from_url_from_list(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1, timeout
=10):
744 if(isinstance(httpurl
, list)):
746 elif(isinstance(httpurl
, tuple)):
748 elif(isinstance(httpurl
, dict)):
749 httpurl
= httpurl
.values();
752 listsize
= len(httpurl
);
755 while(listcount
<listsize
):
756 ouputval
= download_from_url(httpurl
[listcount
], httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, buffersize
, sleep
, timeout
);
757 returnval
.append(ouputval
);
761 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
762 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
764 sleep
= geturls_download_sleep
;
767 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
768 httplibuse
= "urllib";
769 if(httplibuse
=="httplib1"):
770 httplibuse
= "httplib";
771 if(not haverequests
and httplibuse
=="requests"):
772 httplibuse
= "urllib";
773 if(not haveaiohttp
and httplibuse
=="aiohttp"):
774 httplibuse
= "urllib";
775 if(not havehttpx
and httplibuse
=="httpx"):
776 httplibuse
= "urllib";
777 if(not havehttpx
and httplibuse
=="httpx2"):
778 httplibuse
= "urllib";
779 if(not havehttpcore
and httplibuse
=="httpcore"):
780 httplibuse
= "urllib";
781 if(not havehttpcore
and httplibuse
=="httpcore2"):
782 httplibuse
= "urllib";
783 if(not havemechanize
and httplibuse
=="mechanize"):
784 httplibuse
= "urllib";
785 if(not havepycurl
and httplibuse
=="pycurl"):
786 httplibuse
= "urllib";
787 if(not havepycurl
and httplibuse
=="pycurl2"):
788 httplibuse
= "urllib";
789 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
790 httplibuse
= "pycurl";
791 if(not havepycurl
and httplibuse
=="pycurl3"):
792 httplibuse
= "urllib";
793 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
794 httplibuse
= "pycurl2";
795 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
796 httplibuse
= "pycurl";
797 if(not havehttplib2
and httplibuse
=="httplib2"):
798 httplibuse
= "httplib";
799 if(not haveparamiko
and httplibuse
=="sftp"):
801 if(not haveparamiko
and httplibuse
=="pysftp"):
803 if(httplibuse
=="urllib" or httplibuse
=="request"):
804 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
805 elif(httplibuse
=="request"):
806 returnval
= download_from_url_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
807 elif(httplibuse
=="request3"):
808 returnval
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
809 elif(httplibuse
=="httplib"):
810 returnval
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
811 elif(httplibuse
=="httplib2"):
812 returnval
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
813 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
814 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
815 elif(httplibuse
=="requests"):
816 returnval
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
817 elif(httplibuse
=="aiohttp"):
818 returnval
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
819 elif(httplibuse
=="httpx"):
820 returnval
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
821 elif(httplibuse
=="httpx2"):
822 returnval
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
823 elif(httplibuse
=="httpcore"):
824 returnval
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
825 elif(httplibuse
=="httpcore2"):
826 returnval
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
827 elif(httplibuse
=="mechanize"):
828 returnval
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
829 elif(httplibuse
=="pycurl"):
830 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
831 elif(httplibuse
=="pycurl2"):
832 returnval
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
833 elif(httplibuse
=="pycurl3"):
834 returnval
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
835 elif(httplibuse
=="ftp"):
836 returnval
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
837 elif(httplibuse
=="sftp"):
838 returnval
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
839 elif(httplibuse
=="pysftp"):
840 returnval
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
845 def download_from_url_file_with_list(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
846 if(isinstance(httpurl
, list)):
848 elif(isinstance(httpurl
, tuple)):
850 elif(isinstance(httpurl
, dict)):
851 httpurl
= httpurl
.values();
854 listsize
= len(httpurl
);
857 while(listcount
<listsize
):
858 ouputval
= download_from_url_file(httpurl
[listcount
], httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, ranges
, buffersize
, sleep
, timeout
);
859 returnval
.append(ouputval
);
863 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
864 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
866 sleep
= geturls_download_sleep
;
869 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
870 httplibuse
= "urllib";
871 if(httplibuse
=="httplib1"):
872 httplibuse
= "httplib";
873 if(not haverequests
and httplibuse
=="requests"):
874 httplibuse
= "urllib";
875 if(not haveaiohttp
and httplibuse
=="aiohttp"):
876 httplibuse
= "urllib";
877 if(not havehttpx
and httplibuse
=="httpx"):
878 httplibuse
= "urllib";
879 if(not havehttpx
and httplibuse
=="httpx2"):
880 httplibuse
= "urllib";
881 if(not havehttpcore
and httplibuse
=="httpcore"):
882 httplibuse
= "urllib";
883 if(not havehttpcore
and httplibuse
=="httpcore2"):
884 httplibuse
= "urllib";
885 if(not havemechanize
and httplibuse
=="mechanize"):
886 httplibuse
= "urllib";
887 if(not havepycurl
and httplibuse
=="pycurl"):
888 httplibuse
= "urllib";
889 if(not havepycurl
and httplibuse
=="pycurl2"):
890 httplibuse
= "urllib";
891 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
892 httplibuse
= "pycurl";
893 if(not havepycurl
and httplibuse
=="pycurl3"):
894 httplibuse
= "urllib";
895 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
896 httplibuse
= "pycurl2";
897 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
898 httplibuse
= "pycurl";
899 if(not havehttplib2
and httplibuse
=="httplib2"):
900 httplibuse
= "httplib";
901 if(not haveparamiko
and httplibuse
=="sftp"):
903 if(not havepysftp
and httplibuse
=="pysftp"):
905 if(httplibuse
=="urllib" or httplibuse
=="request"):
906 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
907 elif(httplibuse
=="request"):
908 returnval
= download_from_url_to_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
909 elif(httplibuse
=="request3"):
910 returnval
= download_from_url_to_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
911 elif(httplibuse
=="httplib"):
912 returnval
= download_from_url_to_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
913 elif(httplibuse
=="httplib2"):
914 returnval
= download_from_url_to_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
915 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
916 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
917 elif(httplibuse
=="requests"):
918 returnval
= download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
919 elif(httplibuse
=="aiohttp"):
920 returnval
= download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
921 elif(httplibuse
=="httpx"):
922 returnval
= download_from_url_to_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
923 elif(httplibuse
=="httpx2"):
924 returnval
= download_from_url_to_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
925 elif(httplibuse
=="httpcore"):
926 returnval
= download_from_url_to_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
927 elif(httplibuse
=="httpcore2"):
928 returnval
= download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
929 elif(httplibuse
=="mechanize"):
930 returnval
= download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
931 elif(httplibuse
=="pycurl"):
932 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
933 elif(httplibuse
=="pycurl2"):
934 returnval
= download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
935 elif(httplibuse
=="pycurl3"):
936 returnval
= download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
937 elif(httplibuse
=="ftp"):
938 returnval
= download_from_url_to_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
939 elif(httplibuse
=="sftp"):
940 returnval
= download_from_url_to_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
941 elif(httplibuse
=="pysftp"):
942 returnval
= download_from_url_to_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
947 def download_from_url_to_file_with_list(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
948 if(isinstance(httpurl
, list)):
950 elif(isinstance(httpurl
, tuple)):
952 elif(isinstance(httpurl
, dict)):
953 httpurl
= httpurl
.values();
956 listsize
= len(httpurl
);
959 while(listcount
<listsize
):
960 ouputval
= download_from_url_to_file(httpurl
[listcount
], httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
961 returnval
.append(ouputval
);
965 def download_from_url_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
966 global geturls_download_sleep
, havezstd
, havebrotli
;
968 sleep
= geturls_download_sleep
;
971 urlparts
= urlparse
.urlparse(httpurl
);
972 if(isinstance(httpheaders
, list)):
973 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
974 httpheaders
= fix_header_names(httpheaders
);
975 if(httpuseragent
is not None):
976 if('User-Agent' in httpheaders
):
977 httpheaders
['User-Agent'] = httpuseragent
;
979 httpuseragent
.update({'User-Agent': httpuseragent
});
980 if(httpreferer
is not None):
981 if('Referer' in httpheaders
):
982 httpheaders
['Referer'] = httpreferer
;
984 httpuseragent
.update({'Referer': httpreferer
});
985 if(urlparts
.username
is not None or urlparts
.password
is not None):
986 if(sys
.version
[0]=="2"):
987 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
988 if(sys
.version
[0]>="3"):
989 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
990 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
991 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
992 if(isinstance(httpheaders
, dict)):
993 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
994 geturls_opener
.addheaders
= httpheaders
;
996 if(postdata
is not None and not isinstance(postdata
, dict)):
997 postdata
= urlencode(postdata
);
999 geturls_request
= Request(httpurl
);
1000 if(httpmethod
=="GET"):
1001 geturls_text
= geturls_opener
.open(geturls_request
);
1002 elif(httpmethod
=="POST"):
1003 geturls_text
= geturls_opener
.open(geturls_request
, data
=postdata
);
1005 geturls_text
= geturls_opener
.open(geturls_request
);
1006 except HTTPError
as geturls_text_error
:
1007 geturls_text
= geturls_text_error
;
1008 log
.info("Error With URL "+httpurl
);
1010 log
.info("Error With URL "+httpurl
);
1012 except socket
.timeout
:
1013 log
.info("Error With URL "+httpurl
);
1015 httpcodeout
= geturls_text
.getcode();
1017 httpcodereason
= geturls_text
.reason
;
1018 except AttributeError:
1019 httpcodereason
= http_status_to_reason(geturls_text
.getcode());
1021 httpversionout
= geturls_text
.version
;
1022 except AttributeError:
1023 httpversionout
= "1.1";
1024 httpmethodout
= geturls_request
.get_method();
1025 httpurlout
= geturls_text
.geturl();
1026 httpheaderout
= geturls_text
.info();
1027 httpheadersentout
= httpheaders
;
1028 if(isinstance(httpheaderout
, list)):
1029 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1030 httpheaderout
= fix_header_names(httpheaderout
);
1031 if(sys
.version
[0]=="2"):
1033 prehttpheaderout
= httpheaderout
;
1034 httpheaderkeys
= httpheaderout
.keys();
1035 imax
= len(httpheaderkeys
);
1039 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1041 except AttributeError:
1043 if(isinstance(httpheadersentout
, list)):
1044 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1045 httpheadersentout
= fix_header_names(httpheadersentout
);
1046 downloadsize
= httpheaderout
.get('Content-Length');
1047 if(downloadsize
is not None):
1048 downloadsize
= int(downloadsize
);
1049 if downloadsize
is None: downloadsize
= 0;
1052 log
.info("Downloading URL "+httpurl
);
1053 with
BytesIO() as strbuf
:
1055 databytes
= geturls_text
.read(buffersize
);
1056 if not databytes
: break;
1057 datasize
= len(databytes
);
1058 fulldatasize
= datasize
+ fulldatasize
;
1061 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1062 downloaddiff
= fulldatasize
- prevdownsize
;
1063 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1064 prevdownsize
= fulldatasize
;
1065 strbuf
.write(databytes
);
1067 returnval_content
= strbuf
.read();
1068 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1070 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1073 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1075 returnval_content
= zlib
.decompress(returnval_content
);
1078 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1080 returnval_content
= brotli
.decompress(returnval_content
);
1081 except brotli
.error
:
1083 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1085 returnval_content
= zstandard
.decompress(returnval_content
);
1086 except zstandard
.error
:
1088 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1090 returnval_content
= lzma
.decompress(returnval_content
);
1091 except zstandard
.error
:
1093 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1095 returnval_content
= bz2
.decompress(returnval_content
);
1096 except zstandard
.error
:
1098 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "urllib"};
1099 geturls_text
.close();
1102 def download_from_url_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1103 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1104 exec_time_start
= time
.time();
1105 myhash
= hashlib
.new("sha1");
1106 if(sys
.version
[0]=="2"):
1107 myhash
.update(httpurl
);
1108 myhash
.update(str(buffersize
));
1109 myhash
.update(str(exec_time_start
));
1110 if(sys
.version
[0]>="3"):
1111 myhash
.update(httpurl
.encode('utf-8'));
1112 myhash
.update(str(buffersize
).encode('utf-8'));
1113 myhash
.update(str(exec_time_start
).encode('utf-8'));
1114 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1116 sleep
= geturls_download_sleep
;
1119 pretmpfilename
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1120 if(not pretmpfilename
):
1122 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1123 tmpfilename
= f
.name
;
1125 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1126 except AttributeError:
1128 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1133 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1134 f
.write(pretmpfilename
.get('Content'));
1136 exec_time_end
= time
.time();
1137 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1138 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1141 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1142 global geturls_download_sleep
, havezstd
, havebrotli
;
1144 sleep
= geturls_download_sleep
;
1147 if(not outfile
=="-"):
1148 outpath
= outpath
.rstrip(os
.path
.sep
);
1149 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1150 if(not os
.path
.exists(outpath
)):
1151 os
.makedirs(outpath
);
1152 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1154 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1156 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1157 if(not pretmpfilename
):
1159 tmpfilename
= pretmpfilename
.get('Filename');
1160 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1162 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1163 exec_time_start
= time
.time();
1164 shutil
.move(tmpfilename
, filepath
);
1166 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1167 except AttributeError:
1169 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1174 exec_time_end
= time
.time();
1175 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1176 if(os
.path
.exists(tmpfilename
)):
1177 os
.remove(tmpfilename
);
1178 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1180 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1181 tmpfilename
= pretmpfilename
.get('Filename');
1182 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1185 exec_time_start
= time
.time();
1186 with
open(tmpfilename
, 'rb') as ft
:
1189 databytes
= ft
.read(buffersize
[1]);
1190 if not databytes
: break;
1191 datasize
= len(databytes
);
1192 fulldatasize
= datasize
+ fulldatasize
;
1195 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1196 downloaddiff
= fulldatasize
- prevdownsize
;
1197 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1198 prevdownsize
= fulldatasize
;
1201 fdata
= f
.getvalue();
1204 os
.remove(tmpfilename
);
1205 exec_time_end
= time
.time();
1206 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1207 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1210 def download_from_url_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1211 global geturls_download_sleep
, havezstd
, havebrotli
, havezstd
, havebrotli
;
1213 sleep
= geturls_download_sleep
;
1216 urlparts
= urlparse
.urlparse(httpurl
);
1217 if(isinstance(httpheaders
, list)):
1218 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1219 httpheaders
= fix_header_names(httpheaders
);
1220 if(httpuseragent
is not None):
1221 if('User-Agent' in httpheaders
):
1222 httpheaders
['User-Agent'] = httpuseragent
;
1224 httpuseragent
.update({'User-Agent': httpuseragent
});
1225 if(httpreferer
is not None):
1226 if('Referer' in httpheaders
):
1227 httpheaders
['Referer'] = httpreferer
;
1229 httpuseragent
.update({'Referer': httpreferer
});
1230 if(urlparts
.username
is not None or urlparts
.password
is not None):
1231 if(sys
.version
[0]=="2"):
1232 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1233 if(sys
.version
[0]>="3"):
1234 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1235 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1236 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1237 geturls_opener
.addheaders
= httpheaders
;
1239 if(urlparts
[0]=="http"):
1240 httpconn
= HTTPConnection(urlparts
[1], timeout
=timeout
);
1241 elif(urlparts
[0]=="https"):
1242 httpconn
= HTTPSConnection(urlparts
[1], timeout
=timeout
);
1245 if(postdata
is not None and not isinstance(postdata
, dict)):
1246 postdata
= urlencode(postdata
);
1248 if(httpmethod
=="GET"):
1249 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1250 elif(httpmethod
=="POST"):
1251 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1253 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1254 except socket
.timeout
:
1255 log
.info("Error With URL "+httpurl
);
1257 except socket
.gaierror
:
1258 log
.info("Error With URL "+httpurl
);
1260 except BlockingIOError
:
1261 log
.info("Error With URL "+httpurl
);
1263 geturls_text
= httpconn
.getresponse();
1264 httpcodeout
= geturls_text
.status
;
1265 httpcodereason
= geturls_text
.reason
;
1266 if(geturls_text
.version
=="10"):
1267 httpversionout
= "1.0";
1269 httpversionout
= "1.1";
1270 httpmethodout
= geturls_text
._method
;
1271 httpurlout
= httpurl
;
1272 httpheaderout
= geturls_text
.getheaders();
1273 httpheadersentout
= httpheaders
;
1274 if(isinstance(httpheaderout
, list)):
1275 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1276 if(sys
.version
[0]=="2"):
1278 prehttpheaderout
= httpheaderout
;
1279 httpheaderkeys
= httpheaderout
.keys();
1280 imax
= len(httpheaderkeys
);
1284 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1286 except AttributeError:
1288 httpheaderout
= fix_header_names(httpheaderout
);
1289 if(isinstance(httpheadersentout
, list)):
1290 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1291 httpheadersentout
= fix_header_names(httpheadersentout
);
1292 downloadsize
= httpheaderout
.get('Content-Length');
1293 if(downloadsize
is not None):
1294 downloadsize
= int(downloadsize
);
1295 if downloadsize
is None: downloadsize
= 0;
1298 log
.info("Downloading URL "+httpurl
);
1299 with
BytesIO() as strbuf
:
1301 databytes
= geturls_text
.read(buffersize
);
1302 if not databytes
: break;
1303 datasize
= len(databytes
);
1304 fulldatasize
= datasize
+ fulldatasize
;
1307 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1308 downloaddiff
= fulldatasize
- prevdownsize
;
1309 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1310 prevdownsize
= fulldatasize
;
1311 strbuf
.write(databytes
);
1313 returnval_content
= strbuf
.read();
1314 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1316 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1319 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1321 returnval_content
= zlib
.decompress(returnval_content
);
1324 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1326 returnval_content
= brotli
.decompress(returnval_content
);
1327 except brotli
.error
:
1329 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1331 returnval_content
= zstandard
.decompress(returnval_content
);
1332 except zstandard
.error
:
1334 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1336 returnval_content
= lzma
.decompress(returnval_content
);
1337 except zstandard
.error
:
1339 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1341 returnval_content
= bz2
.decompress(returnval_content
);
1342 except zstandard
.error
:
1344 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httplib"};
1345 geturls_text
.close();
1348 def download_from_url_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1349 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1350 exec_time_start
= time
.time();
1351 myhash
= hashlib
.new("sha1");
1352 if(sys
.version
[0]=="2"):
1353 myhash
.update(httpurl
);
1354 myhash
.update(str(buffersize
));
1355 myhash
.update(str(exec_time_start
));
1356 if(sys
.version
[0]>="3"):
1357 myhash
.update(httpurl
.encode('utf-8'));
1358 myhash
.update(str(buffersize
).encode('utf-8'));
1359 myhash
.update(str(exec_time_start
).encode('utf-8'));
1360 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1362 sleep
= geturls_download_sleep
;
1365 pretmpfilename
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1366 if(not pretmpfilename
):
1368 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1369 tmpfilename
= f
.name
;
1371 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1372 except AttributeError:
1374 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1379 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1380 f
.write(pretmpfilename
.get('Content'));
1382 exec_time_end
= time
.time();
1383 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1384 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1387 def download_from_url_to_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1388 global geturls_download_sleep
, havezstd
, havebrotli
;
1390 sleep
= geturls_download_sleep
;
1393 if(not outfile
=="-"):
1394 outpath
= outpath
.rstrip(os
.path
.sep
);
1395 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1396 if(not os
.path
.exists(outpath
)):
1397 os
.makedirs(outpath
);
1398 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1400 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1402 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1403 if(not pretmpfilename
):
1405 tmpfilename
= pretmpfilename
.get('Filename');
1406 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1408 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1409 exec_time_start
= time
.time();
1410 shutil
.move(tmpfilename
, filepath
);
1412 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1413 except AttributeError:
1415 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1420 exec_time_end
= time
.time();
1421 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1422 if(os
.path
.exists(tmpfilename
)):
1423 os
.remove(tmpfilename
);
1424 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1426 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1427 tmpfilename
= pretmpfilename
.get('Filename');
1428 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1431 exec_time_start
= time
.time();
1432 with
open(tmpfilename
, 'rb') as ft
:
1435 databytes
= ft
.read(buffersize
[1]);
1436 if not databytes
: break;
1437 datasize
= len(databytes
);
1438 fulldatasize
= datasize
+ fulldatasize
;
1441 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1442 downloaddiff
= fulldatasize
- prevdownsize
;
1443 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1444 prevdownsize
= fulldatasize
;
1447 fdata
= f
.getvalue();
1450 os
.remove(tmpfilename
);
1451 exec_time_end
= time
.time();
1452 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1453 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1457 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1458 global geturls_download_sleep
, havezstd
, havebrotli
;
1460 sleep
= geturls_download_sleep
;
1463 urlparts
= urlparse
.urlparse(httpurl
);
1464 if(isinstance(httpheaders
, list)):
1465 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1466 httpheaders
= fix_header_names(httpheaders
);
1467 if(httpuseragent
is not None):
1468 if('User-Agent' in httpheaders
):
1469 httpheaders
['User-Agent'] = httpuseragent
;
1471 httpuseragent
.update({'User-Agent': httpuseragent
});
1472 if(httpreferer
is not None):
1473 if('Referer' in httpheaders
):
1474 httpheaders
['Referer'] = httpreferer
;
1476 httpuseragent
.update({'Referer': httpreferer
});
1477 if(urlparts
.username
is not None or urlparts
.password
is not None):
1478 if(sys
.version
[0]=="2"):
1479 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1480 if(sys
.version
[0]>="3"):
1481 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1482 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1483 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1484 geturls_opener
.addheaders
= httpheaders
;
1486 if(urlparts
[0]=="http"):
1487 httpconn
= HTTPConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
1488 elif(urlparts
[0]=="https"):
1489 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
1492 if(postdata
is not None and not isinstance(postdata
, dict)):
1493 postdata
= urlencode(postdata
);
1495 if(httpmethod
=="GET"):
1496 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1497 elif(httpmethod
=="POST"):
1498 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1500 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1501 except socket
.timeout
:
1502 log
.info("Error With URL "+httpurl
);
1504 except socket
.gaierror
:
1505 log
.info("Error With URL "+httpurl
);
1507 except BlockingIOError
:
1508 log
.info("Error With URL "+httpurl
);
1510 geturls_text
= httpconn
.getresponse();
1511 httpcodeout
= geturls_text
.status
;
1512 httpcodereason
= geturls_text
.reason
;
1513 if(geturls_text
.version
=="10"):
1514 httpversionout
= "1.0";
1516 httpversionout
= "1.1";
1517 httpmethodout
= httpmethod
;
1518 httpurlout
= httpurl
;
1519 httpheaderout
= geturls_text
.getheaders();
1520 httpheadersentout
= httpheaders
;
1521 if(isinstance(httpheaderout
, list)):
1522 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1523 if(sys
.version
[0]=="2"):
1525 prehttpheaderout
= httpheaderout
;
1526 httpheaderkeys
= httpheaderout
.keys();
1527 imax
= len(httpheaderkeys
);
1531 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1533 except AttributeError:
1535 httpheaderout
= fix_header_names(httpheaderout
);
1536 if(isinstance(httpheadersentout
, list)):
1537 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1538 httpheadersentout
= fix_header_names(httpheadersentout
);
1539 downloadsize
= httpheaderout
.get('Content-Length');
1540 if(downloadsize
is not None):
1541 downloadsize
= int(downloadsize
);
1542 if downloadsize
is None: downloadsize
= 0;
1545 log
.info("Downloading URL "+httpurl
);
1546 with
BytesIO() as strbuf
:
1548 databytes
= geturls_text
.read(buffersize
);
1549 if not databytes
: break;
1550 datasize
= len(databytes
);
1551 fulldatasize
= datasize
+ fulldatasize
;
1554 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1555 downloaddiff
= fulldatasize
- prevdownsize
;
1556 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1557 prevdownsize
= fulldatasize
;
1558 strbuf
.write(databytes
);
1560 returnval_content
= strbuf
.read();
1561 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1563 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1566 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1568 returnval_content
= zlib
.decompress(returnval_content
);
1571 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1573 returnval_content
= brotli
.decompress(returnval_content
);
1574 except brotli
.error
:
1576 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1578 returnval_content
= zstandard
.decompress(returnval_content
);
1579 except zstandard
.error
:
1581 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1583 returnval_content
= lzma
.decompress(returnval_content
);
1584 except zstandard
.error
:
1586 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1588 returnval_content
= bz2
.decompress(returnval_content
);
1589 except zstandard
.error
:
1591 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httplib2"};
1592 geturls_text
.close();
1595 if(not havehttplib2
):
1596 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1597 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1601 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1602 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1603 exec_time_start
= time
.time();
1604 myhash
= hashlib
.new("sha1");
1605 if(sys
.version
[0]=="2"):
1606 myhash
.update(httpurl
);
1607 myhash
.update(str(buffersize
));
1608 myhash
.update(str(exec_time_start
));
1609 if(sys
.version
[0]>="3"):
1610 myhash
.update(httpurl
.encode('utf-8'));
1611 myhash
.update(str(buffersize
).encode('utf-8'));
1612 myhash
.update(str(exec_time_start
).encode('utf-8'));
1613 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1615 sleep
= geturls_download_sleep
;
1618 pretmpfilename
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1619 if(not pretmpfilename
):
1621 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1622 tmpfilename
= f
.name
;
1624 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1625 except AttributeError:
1627 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1632 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1633 f
.write(pretmpfilename
.get('Content'));
1635 exec_time_end
= time
.time();
1636 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1637 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1640 if(not havehttplib2
):
1641 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1642 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1646 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1647 global geturls_download_sleep
, havezstd
, havebrotli
;
1649 sleep
= geturls_download_sleep
;
1652 if(not outfile
=="-"):
1653 outpath
= outpath
.rstrip(os
.path
.sep
);
1654 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1655 if(not os
.path
.exists(outpath
)):
1656 os
.makedirs(outpath
);
1657 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1659 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1661 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1662 if(not pretmpfilename
):
1664 tmpfilename
= pretmpfilename
.get('Filename');
1665 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1667 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1668 exec_time_start
= time
.time();
1669 shutil
.move(tmpfilename
, filepath
);
1671 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1672 except AttributeError:
1674 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1679 exec_time_end
= time
.time();
1680 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1681 if(os
.path
.exists(tmpfilename
)):
1682 os
.remove(tmpfilename
);
1683 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1685 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1686 tmpfilename
= pretmpfilename
.get('Filename');
1687 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1690 exec_time_start
= time
.time();
1691 with
open(tmpfilename
, 'rb') as ft
:
1694 databytes
= ft
.read(buffersize
[1]);
1695 if not databytes
: break;
1696 datasize
= len(databytes
);
1697 fulldatasize
= datasize
+ fulldatasize
;
1700 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1701 downloaddiff
= fulldatasize
- prevdownsize
;
1702 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1703 prevdownsize
= fulldatasize
;
1706 fdata
= f
.getvalue();
1709 os
.remove(tmpfilename
);
1710 exec_time_end
= time
.time();
1711 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1712 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1715 if(not havehttplib2
):
1716 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1717 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1720 def download_from_url_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1721 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1724 def download_from_url_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1725 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1728 def download_from_url_to_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1729 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1733 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1734 global geturls_download_sleep
, havezstd
, havebrotli
;
1736 sleep
= geturls_download_sleep
;
1739 urlparts
= urlparse
.urlparse(httpurl
);
1740 if(isinstance(httpheaders
, list)):
1741 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1742 httpheaders
= fix_header_names(httpheaders
);
1743 if(httpuseragent
is not None):
1744 if('User-Agent' in httpheaders
):
1745 httpheaders
['User-Agent'] = httpuseragent
;
1747 httpuseragent
.update({'User-Agent': httpuseragent
});
1748 if(httpreferer
is not None):
1749 if('Referer' in httpheaders
):
1750 httpheaders
['Referer'] = httpreferer
;
1752 httpuseragent
.update({'Referer': httpreferer
});
1753 if(urlparts
.username
is not None or urlparts
.password
is not None):
1754 if(sys
.version
[0]=="2"):
1755 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1756 if(sys
.version
[0]>="3"):
1757 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1758 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1760 if(postdata
is not None and not isinstance(postdata
, dict)):
1761 postdata
= urlencode(postdata
);
1763 reqsession
= requests
.Session();
1764 if(httpmethod
=="GET"):
1765 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1766 elif(httpmethod
=="POST"):
1767 geturls_text
= reqsession
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1769 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1770 except requests
.exceptions
.ConnectTimeout
:
1771 log
.info("Error With URL "+httpurl
);
1773 except requests
.exceptions
.ConnectError
:
1774 log
.info("Error With URL "+httpurl
);
1776 except socket
.timeout
:
1777 log
.info("Error With URL "+httpurl
);
1779 httpcodeout
= geturls_text
.status_code
;
1780 httpcodereason
= geturls_text
.reason
;
1781 if(geturls_text
.raw
.version
=="10"):
1782 httpversionout
= "1.0";
1784 httpversionout
= "1.1";
1785 httpmethodout
= httpmethod
;
1786 httpurlout
= geturls_text
.url
;
1787 httpheaderout
= geturls_text
.headers
;
1788 httpheadersentout
= geturls_text
.request
.headers
;
1789 if(isinstance(httpheaderout
, list)):
1790 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1791 if(sys
.version
[0]=="2"):
1793 prehttpheaderout
= httpheaderout
;
1794 httpheaderkeys
= httpheaderout
.keys();
1795 imax
= len(httpheaderkeys
);
1799 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1801 except AttributeError:
1803 httpheaderout
= fix_header_names(httpheaderout
);
1804 if(isinstance(httpheadersentout
, list)):
1805 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1806 httpheadersentout
= fix_header_names(httpheadersentout
);
1807 downloadsize
= httpheaderout
.get('Content-Length');
1808 if(downloadsize
is not None):
1809 downloadsize
= int(downloadsize
);
1810 if downloadsize
is None: downloadsize
= 0;
1813 log
.info("Downloading URL "+httpurl
);
1814 with
BytesIO() as strbuf
:
1816 databytes
= geturls_text
.raw
.read(buffersize
);
1817 if not databytes
: break;
1818 datasize
= len(databytes
);
1819 fulldatasize
= datasize
+ fulldatasize
;
1822 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1823 downloaddiff
= fulldatasize
- prevdownsize
;
1824 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1825 prevdownsize
= fulldatasize
;
1826 strbuf
.write(databytes
);
1828 returnval_content
= strbuf
.read();
1829 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1831 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1834 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1836 returnval_content
= zlib
.decompress(returnval_content
);
1839 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1841 returnval_content
= brotli
.decompress(returnval_content
);
1842 except brotli
.error
:
1844 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1846 returnval_content
= zstandard
.decompress(returnval_content
);
1847 except zstandard
.error
:
1849 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1851 returnval_content
= lzma
.decompress(returnval_content
);
1852 except zstandard
.error
:
1854 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1856 returnval_content
= bz2
.decompress(returnval_content
);
1857 except zstandard
.error
:
1859 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "requests"};
1860 geturls_text
.close();
1863 if(not haverequests
):
1864 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1865 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1869 def download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1870 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1871 exec_time_start
= time
.time();
1872 myhash
= hashlib
.new("sha1");
1873 if(sys
.version
[0]=="2"):
1874 myhash
.update(httpurl
);
1875 myhash
.update(str(buffersize
));
1876 myhash
.update(str(exec_time_start
));
1877 if(sys
.version
[0]>="3"):
1878 myhash
.update(httpurl
.encode('utf-8'));
1879 myhash
.update(str(buffersize
).encode('utf-8'));
1880 myhash
.update(str(exec_time_start
).encode('utf-8'));
1881 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1883 sleep
= geturls_download_sleep
;
1886 pretmpfilename
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1887 if(not pretmpfilename
):
1889 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1890 tmpfilename
= f
.name
;
1892 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1893 except AttributeError:
1895 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1900 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1901 f
.write(pretmpfilename
.get('Content'));
1903 exec_time_end
= time
.time();
1904 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1905 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1908 if(not haverequests
):
1909 def download_from_url_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1910 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1914 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1915 global geturls_download_sleep
, havezstd
, havebrotli
;
1917 sleep
= geturls_download_sleep
;
1920 if(not outfile
=="-"):
1921 outpath
= outpath
.rstrip(os
.path
.sep
);
1922 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1923 if(not os
.path
.exists(outpath
)):
1924 os
.makedirs(outpath
);
1925 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1927 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1929 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1930 if(not pretmpfilename
):
1932 tmpfilename
= pretmpfilename
.get('Filename');
1933 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1935 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1936 exec_time_start
= time
.time();
1937 shutil
.move(tmpfilename
, filepath
);
1939 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1940 except AttributeError:
1942 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1947 exec_time_end
= time
.time();
1948 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1949 if(os
.path
.exists(tmpfilename
)):
1950 os
.remove(tmpfilename
);
1951 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1953 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1954 tmpfilename
= pretmpfilename
.get('Filename');
1955 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1958 exec_time_start
= time
.time();
1959 with
open(tmpfilename
, 'rb') as ft
:
1962 databytes
= ft
.read(buffersize
[1]);
1963 if not databytes
: break;
1964 datasize
= len(databytes
);
1965 fulldatasize
= datasize
+ fulldatasize
;
1968 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1969 downloaddiff
= fulldatasize
- prevdownsize
;
1970 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1971 prevdownsize
= fulldatasize
;
1974 fdata
= f
.getvalue();
1977 os
.remove(tmpfilename
);
1978 exec_time_end
= time
.time();
1979 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1980 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1983 if(not haverequests
):
1984 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1985 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1989 def download_from_url_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1990 global geturls_download_sleep
, havezstd
, havebrotli
;
1992 sleep
= geturls_download_sleep
;
1995 urlparts
= urlparse
.urlparse(httpurl
);
1996 if(isinstance(httpheaders
, list)):
1997 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1998 httpheaders
= fix_header_names(httpheaders
);
1999 if(httpuseragent
is not None):
2000 if('User-Agent' in httpheaders
):
2001 httpheaders
['User-Agent'] = httpuseragent
;
2003 httpuseragent
.update({'User-Agent': httpuseragent
});
2004 if(httpreferer
is not None):
2005 if('Referer' in httpheaders
):
2006 httpheaders
['Referer'] = httpreferer
;
2008 httpuseragent
.update({'Referer': httpreferer
});
2009 if(urlparts
.username
is not None or urlparts
.password
is not None):
2010 if(sys
.version
[0]=="2"):
2011 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2012 if(sys
.version
[0]>="3"):
2013 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2014 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2016 if(postdata
is not None and not isinstance(postdata
, dict)):
2017 postdata
= urlencode(postdata
);
2019 reqsession
= aiohttp
.ClientSession(cookie_jar
=httpcookie
, headers
=httpheaders
, timeout
=timeout
, read_timeout
=timeout
, conn_timeout
=timeout
, read_bufsize
=buffersize
);
2020 if(httpmethod
=="GET"):
2021 geturls_text
= reqsession
.get(httpurl
);
2022 elif(httpmethod
=="POST"):
2023 geturls_text
= reqsession
.post(httpurl
, data
=postdata
);
2025 geturls_text
= reqsession
.get(httpurl
);
2026 except aiohttp
.exceptions
.ConnectTimeout
:
2027 log
.info("Error With URL "+httpurl
);
2029 except aiohttp
.exceptions
.ConnectError
:
2030 log
.info("Error With URL "+httpurl
);
2032 except socket
.timeout
:
2033 log
.info("Error With URL "+httpurl
);
2035 httpcodeout
= geturls_text
.status
;
2036 httpcodereason
= geturls_text
.reason
;
2037 httpversionout
= geturls_text
.version
;
2038 httpmethodout
= geturls_text
.method
;
2039 httpurlout
= geturls_text
.url
;
2040 httpheaderout
= geturls_text
.headers
;
2041 httpheadersentout
= geturls_text
.request_info
.headers
;
2042 if(isinstance(httpheaderout
, list)):
2043 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2044 if(sys
.version
[0]=="2"):
2046 prehttpheaderout
= httpheaderout
;
2047 httpheaderkeys
= httpheaderout
.keys();
2048 imax
= len(httpheaderkeys
);
2052 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2054 except AttributeError:
2056 httpheaderout
= fix_header_names(httpheaderout
);
2057 if(isinstance(httpheadersentout
, list)):
2058 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2059 httpheadersentout
= fix_header_names(httpheadersentout
);
2060 downloadsize
= httpheaderout
.get('Content-Length');
2061 if(downloadsize
is not None):
2062 downloadsize
= int(downloadsize
);
2063 if downloadsize
is None: downloadsize
= 0;
2066 log
.info("Downloading URL "+httpurl
);
2067 with
BytesIO() as strbuf
:
2069 databytes
= geturls_text
.read(buffersize
);
2070 if not databytes
: break;
2071 datasize
= len(databytes
);
2072 fulldatasize
= datasize
+ fulldatasize
;
2075 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2076 downloaddiff
= fulldatasize
- prevdownsize
;
2077 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2078 prevdownsize
= fulldatasize
;
2079 strbuf
.write(databytes
);
2081 returnval_content
= strbuf
.read();
2082 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2084 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2087 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2089 returnval_content
= zlib
.decompress(returnval_content
);
2092 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2094 returnval_content
= brotli
.decompress(returnval_content
);
2095 except brotli
.error
:
2097 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2099 returnval_content
= zstandard
.decompress(returnval_content
);
2100 except zstandard
.error
:
2102 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2104 returnval_content
= lzma
.decompress(returnval_content
);
2105 except zstandard
.error
:
2107 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2109 returnval_content
= bz2
.decompress(returnval_content
);
2110 except zstandard
.error
:
2112 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "aiohttp"};
2113 geturls_text
.close();
2116 if(not haveaiohttp
):
2117 def download_from_url_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2118 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2122 def download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2123 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2124 exec_time_start
= time
.time();
2125 myhash
= hashlib
.new("sha1");
2126 if(sys
.version
[0]=="2"):
2127 myhash
.update(httpurl
);
2128 myhash
.update(str(buffersize
));
2129 myhash
.update(str(exec_time_start
));
2130 if(sys
.version
[0]>="3"):
2131 myhash
.update(httpurl
.encode('utf-8'));
2132 myhash
.update(str(buffersize
).encode('utf-8'));
2133 myhash
.update(str(exec_time_start
).encode('utf-8'));
2134 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2136 sleep
= geturls_download_sleep
;
2139 pretmpfilename
= download_from_url_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2140 if(not pretmpfilename
):
2142 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2143 tmpfilename
= f
.name
;
2145 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2146 except AttributeError:
2148 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2153 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2154 f
.write(pretmpfilename
.get('Content'));
2156 exec_time_end
= time
.time();
2157 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2158 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2161 if(not haveaiohttp
):
2162 def download_from_url_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2163 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2167 def download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2168 global geturls_download_sleep
, havezstd
, havebrotli
;
2170 sleep
= geturls_download_sleep
;
2173 if(not outfile
=="-"):
2174 outpath
= outpath
.rstrip(os
.path
.sep
);
2175 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2176 if(not os
.path
.exists(outpath
)):
2177 os
.makedirs(outpath
);
2178 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2180 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2182 pretmpfilename
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2183 if(not pretmpfilename
):
2185 tmpfilename
= pretmpfilename
.get('Filename');
2186 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2188 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2189 exec_time_start
= time
.time();
2190 shutil
.move(tmpfilename
, filepath
);
2192 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2193 except AttributeError:
2195 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2200 exec_time_end
= time
.time();
2201 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2202 if(os
.path
.exists(tmpfilename
)):
2203 os
.remove(tmpfilename
);
2204 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2206 pretmpfilename
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2207 tmpfilename
= pretmpfilename
.get('Filename');
2208 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2211 exec_time_start
= time
.time();
2212 with
open(tmpfilename
, 'rb') as ft
:
2215 databytes
= ft
.read(buffersize
[1]);
2216 if not databytes
: break;
2217 datasize
= len(databytes
);
2218 fulldatasize
= datasize
+ fulldatasize
;
2221 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2222 downloaddiff
= fulldatasize
- prevdownsize
;
2223 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2224 prevdownsize
= fulldatasize
;
2227 fdata
= f
.getvalue();
2230 os
.remove(tmpfilename
);
2231 exec_time_end
= time
.time();
2232 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2233 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2236 if(not haveaiohttp
):
2237 def download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2238 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2242 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2243 global geturls_download_sleep
, havezstd
, havebrotli
;
2245 sleep
= geturls_download_sleep
;
2248 urlparts
= urlparse
.urlparse(httpurl
);
2249 if(isinstance(httpheaders
, list)):
2250 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2251 httpheaders
= fix_header_names(httpheaders
);
2252 if(httpuseragent
is not None):
2253 if('User-Agent' in httpheaders
):
2254 httpheaders
['User-Agent'] = httpuseragent
;
2256 httpuseragent
.update({'User-Agent': httpuseragent
});
2257 if(httpreferer
is not None):
2258 if('Referer' in httpheaders
):
2259 httpheaders
['Referer'] = httpreferer
;
2261 httpuseragent
.update({'Referer': httpreferer
});
2262 if(urlparts
.username
is not None or urlparts
.password
is not None):
2263 if(sys
.version
[0]=="2"):
2264 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2265 if(sys
.version
[0]>="3"):
2266 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2267 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2269 if(postdata
is not None and not isinstance(postdata
, dict)):
2270 postdata
= urlencode(postdata
);
2272 if(httpmethod
=="GET"):
2273 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2274 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2275 elif(httpmethod
=="POST"):
2276 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2277 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2279 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2280 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2281 except httpx
.ConnectTimeout
:
2282 log
.info("Error With URL "+httpurl
);
2284 except httpx
.ConnectError
:
2285 log
.info("Error With URL "+httpurl
);
2287 except socket
.timeout
:
2288 log
.info("Error With URL "+httpurl
);
2290 httpcodeout
= geturls_text
.status_code
;
2292 httpcodereason
= geturls_text
.reason_phrase
;
2294 httpcodereason
= http_status_to_reason(geturls_text
.status_code
);
2295 httpversionout
= geturls_text
.http_version
;
2296 httpmethodout
= httpmethod
;
2297 httpurlout
= str(geturls_text
.url
);
2298 httpheaderout
= geturls_text
.headers
;
2299 httpheadersentout
= geturls_text
.request
.headers
;
2300 if(isinstance(httpheaderout
, list)):
2301 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2302 if(sys
.version
[0]=="2"):
2304 prehttpheaderout
= httpheaderout
;
2305 httpheaderkeys
= httpheaderout
.keys();
2306 imax
= len(httpheaderkeys
);
2310 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2312 except AttributeError:
2314 httpheaderout
= fix_header_names(httpheaderout
);
2315 if(isinstance(httpheadersentout
, list)):
2316 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2317 httpheadersentout
= fix_header_names(httpheadersentout
);
2318 downloadsize
= httpheaderout
.get('Content-Length');
2319 if(downloadsize
is not None):
2320 downloadsize
= int(downloadsize
);
2321 if downloadsize
is None: downloadsize
= 0;
2324 log
.info("Downloading URL "+httpurl
);
2325 with
BytesIO() as strbuf
:
2327 databytes
= geturls_text
.read();
2328 if not databytes
: break;
2329 datasize
= len(databytes
);
2330 fulldatasize
= datasize
+ fulldatasize
;
2333 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2334 downloaddiff
= fulldatasize
- prevdownsize
;
2335 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2336 prevdownsize
= fulldatasize
;
2337 strbuf
.write(databytes
);
2340 returnval_content
= strbuf
.read();
2341 geturls_text
.close();
2342 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2344 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2347 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2349 returnval_content
= zlib
.decompress(returnval_content
);
2352 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2354 returnval_content
= brotli
.decompress(returnval_content
);
2355 except brotli
.error
:
2357 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2359 returnval_content
= zstandard
.decompress(returnval_content
);
2360 except zstandard
.error
:
2362 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2364 returnval_content
= lzma
.decompress(returnval_content
);
2365 except zstandard
.error
:
2367 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2369 returnval_content
= bz2
.decompress(returnval_content
);
2370 except zstandard
.error
:
2372 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpx"};
2373 geturls_text
.close();
2377 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2378 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2382 def download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2383 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2384 exec_time_start
= time
.time();
2385 myhash
= hashlib
.new("sha1");
2386 if(sys
.version
[0]=="2"):
2387 myhash
.update(httpurl
);
2388 myhash
.update(str(buffersize
));
2389 myhash
.update(str(exec_time_start
));
2390 if(sys
.version
[0]>="3"):
2391 myhash
.update(httpurl
.encode('utf-8'));
2392 myhash
.update(str(buffersize
).encode('utf-8'));
2393 myhash
.update(str(exec_time_start
).encode('utf-8'));
2394 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2396 sleep
= geturls_download_sleep
;
2399 pretmpfilename
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2400 if(not pretmpfilename
):
2402 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2403 tmpfilename
= f
.name
;
2405 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2406 except AttributeError:
2408 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2413 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2414 f
.write(pretmpfilename
.get('Content'));
2416 exec_time_end
= time
.time();
2417 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2418 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2422 def download_from_url_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2423 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2427 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2428 global geturls_download_sleep
, havezstd
, havebrotli
;
2430 sleep
= geturls_download_sleep
;
2433 if(not outfile
=="-"):
2434 outpath
= outpath
.rstrip(os
.path
.sep
);
2435 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2436 if(not os
.path
.exists(outpath
)):
2437 os
.makedirs(outpath
);
2438 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2440 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2442 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2443 if(not pretmpfilename
):
2445 tmpfilename
= pretmpfilename
.get('Filename');
2446 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2448 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2449 exec_time_start
= time
.time();
2450 shutil
.move(tmpfilename
, filepath
);
2452 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2453 except AttributeError:
2455 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2460 exec_time_end
= time
.time();
2461 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2462 if(os
.path
.exists(tmpfilename
)):
2463 os
.remove(tmpfilename
);
2464 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2466 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2467 tmpfilename
= pretmpfilename
.get('Filename');
2468 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2471 exec_time_start
= time
.time();
2472 with
open(tmpfilename
, 'rb') as ft
:
2475 databytes
= ft
.read(buffersize
[1]);
2476 if not databytes
: break;
2477 datasize
= len(databytes
);
2478 fulldatasize
= datasize
+ fulldatasize
;
2481 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2482 downloaddiff
= fulldatasize
- prevdownsize
;
2483 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2484 prevdownsize
= fulldatasize
;
2487 fdata
= f
.getvalue();
2490 os
.remove(tmpfilename
);
2491 exec_time_end
= time
.time();
2492 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2493 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2497 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2498 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2502 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2503 global geturls_download_sleep
, havezstd
, havebrotli
;
2505 sleep
= geturls_download_sleep
;
2508 urlparts
= urlparse
.urlparse(httpurl
);
2509 if(isinstance(httpheaders
, list)):
2510 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2511 httpheaders
= fix_header_names(httpheaders
);
2512 if(httpuseragent
is not None):
2513 if('User-Agent' in httpheaders
):
2514 httpheaders
['User-Agent'] = httpuseragent
;
2516 httpuseragent
.update({'User-Agent': httpuseragent
});
2517 if(httpreferer
is not None):
2518 if('Referer' in httpheaders
):
2519 httpheaders
['Referer'] = httpreferer
;
2521 httpuseragent
.update({'Referer': httpreferer
});
2522 if(urlparts
.username
is not None or urlparts
.password
is not None):
2523 if(sys
.version
[0]=="2"):
2524 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2525 if(sys
.version
[0]>="3"):
2526 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2527 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2529 if(postdata
is not None and not isinstance(postdata
, dict)):
2530 postdata
= urlencode(postdata
);
2532 if(httpmethod
=="GET"):
2533 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2534 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2535 elif(httpmethod
=="POST"):
2536 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2537 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2539 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2540 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2541 except httpx
.ConnectTimeout
:
2542 log
.info("Error With URL "+httpurl
);
2544 except httpx
.ConnectError
:
2545 log
.info("Error With URL "+httpurl
);
2547 except socket
.timeout
:
2548 log
.info("Error With URL "+httpurl
);
2550 httpcodeout
= geturls_text
.status_code
;
2552 httpcodereason
= geturls_text
.reason_phrase
;
2554 httpcodereason
= http_status_to_reason(geturls_text
.status_code
);
2555 httpversionout
= geturls_text
.http_version
;
2556 httpmethodout
= httpmethod
;
2557 httpurlout
= str(geturls_text
.url
);
2558 httpheaderout
= geturls_text
.headers
;
2559 httpheadersentout
= geturls_text
.request
.headers
;
2560 if(isinstance(httpheaderout
, list)):
2561 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2562 if(sys
.version
[0]=="2"):
2564 prehttpheaderout
= httpheaderout
;
2565 httpheaderkeys
= httpheaderout
.keys();
2566 imax
= len(httpheaderkeys
);
2570 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2572 except AttributeError:
2574 httpheaderout
= fix_header_names(httpheaderout
);
2575 if(isinstance(httpheadersentout
, list)):
2576 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2577 httpheadersentout
= fix_header_names(httpheadersentout
);
2578 downloadsize
= httpheaderout
.get('Content-Length');
2579 if(downloadsize
is not None):
2580 downloadsize
= int(downloadsize
);
2581 if downloadsize
is None: downloadsize
= 0;
2584 log
.info("Downloading URL "+httpurl
);
2585 with
BytesIO() as strbuf
:
2587 databytes
= geturls_text
.read();
2588 if not databytes
: break;
2589 datasize
= len(databytes
);
2590 fulldatasize
= datasize
+ fulldatasize
;
2593 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2594 downloaddiff
= fulldatasize
- prevdownsize
;
2595 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2596 prevdownsize
= fulldatasize
;
2597 strbuf
.write(databytes
);
2600 returnval_content
= strbuf
.read();
2601 geturls_text
.close();
2602 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2604 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2607 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2609 returnval_content
= zlib
.decompress(returnval_content
);
2612 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2614 returnval_content
= brotli
.decompress(returnval_content
);
2615 except brotli
.error
:
2617 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2619 returnval_content
= zstandard
.decompress(returnval_content
);
2620 except zstandard
.error
:
2622 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2624 returnval_content
= lzma
.decompress(returnval_content
);
2625 except zstandard
.error
:
2627 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2629 returnval_content
= bz2
.decompress(returnval_content
);
2630 except zstandard
.error
:
2632 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpx2"};
2633 geturls_text
.close();
2637 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2638 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2642 def download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2643 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2644 exec_time_start
= time
.time();
2645 myhash
= hashlib
.new("sha1");
2646 if(sys
.version
[0]=="2"):
2647 myhash
.update(httpurl
);
2648 myhash
.update(str(buffersize
));
2649 myhash
.update(str(exec_time_start
));
2650 if(sys
.version
[0]>="3"):
2651 myhash
.update(httpurl
.encode('utf-8'));
2652 myhash
.update(str(buffersize
).encode('utf-8'));
2653 myhash
.update(str(exec_time_start
).encode('utf-8'));
2654 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2656 sleep
= geturls_download_sleep
;
2659 pretmpfilename
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2660 if(not pretmpfilename
):
2662 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2663 tmpfilename
= f
.name
;
2665 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2666 except AttributeError:
2668 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2673 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2674 f
.write(pretmpfilename
.get('Content'));
2676 exec_time_end
= time
.time();
2677 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2678 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2682 def download_from_url_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2683 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2687 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2688 global geturls_download_sleep
, havezstd
, havebrotli
;
2690 sleep
= geturls_download_sleep
;
2693 if(not outfile
=="-"):
2694 outpath
= outpath
.rstrip(os
.path
.sep
);
2695 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2696 if(not os
.path
.exists(outpath
)):
2697 os
.makedirs(outpath
);
2698 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2700 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2702 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2703 if(not pretmpfilename
):
2705 tmpfilename
= pretmpfilename
.get('Filename');
2706 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2708 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2709 exec_time_start
= time
.time();
2710 shutil
.move(tmpfilename
, filepath
);
2712 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2713 except AttributeError:
2715 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2720 exec_time_end
= time
.time();
2721 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2722 if(os
.path
.exists(tmpfilename
)):
2723 os
.remove(tmpfilename
);
2724 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2726 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2727 tmpfilename
= pretmpfilename
.get('Filename');
2728 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2731 exec_time_start
= time
.time();
2732 with
open(tmpfilename
, 'rb') as ft
:
2735 databytes
= ft
.read(buffersize
[1]);
2736 if not databytes
: break;
2737 datasize
= len(databytes
);
2738 fulldatasize
= datasize
+ fulldatasize
;
2741 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2742 downloaddiff
= fulldatasize
- prevdownsize
;
2743 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2744 prevdownsize
= fulldatasize
;
2747 fdata
= f
.getvalue();
2750 os
.remove(tmpfilename
);
2751 exec_time_end
= time
.time();
2752 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2753 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2757 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2758 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2762 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2763 global geturls_download_sleep
, havezstd
, havebrotli
;
2765 sleep
= geturls_download_sleep
;
2768 urlparts
= urlparse
.urlparse(httpurl
);
2769 if(isinstance(httpheaders
, list)):
2770 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2771 httpheaders
= fix_header_names(httpheaders
);
2772 if(httpuseragent
is not None):
2773 if('User-Agent' in httpheaders
):
2774 httpheaders
['User-Agent'] = httpuseragent
;
2776 httpuseragent
.update({'User-Agent': httpuseragent
});
2777 if(httpreferer
is not None):
2778 if('Referer' in httpheaders
):
2779 httpheaders
['Referer'] = httpreferer
;
2781 httpuseragent
.update({'Referer': httpreferer
});
2782 if(urlparts
.username
is not None or urlparts
.password
is not None):
2783 if(sys
.version
[0]=="2"):
2784 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2785 if(sys
.version
[0]>="3"):
2786 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2787 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2789 if(postdata
is not None and not isinstance(postdata
, dict)):
2790 postdata
= urlencode(postdata
);
2792 if(httpmethod
=="GET"):
2793 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2794 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2795 elif(httpmethod
=="POST"):
2796 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2797 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2799 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2800 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2801 except httpcore
.ConnectTimeout
:
2802 log
.info("Error With URL "+httpurl
);
2804 except httpcore
.ConnectError
:
2805 log
.info("Error With URL "+httpurl
);
2807 except socket
.timeout
:
2808 log
.info("Error With URL "+httpurl
);
2810 httpcodeout
= geturls_text
.status
;
2811 httpcodereason
= http_status_to_reason(geturls_text
.status
);
2812 httpversionout
= "1.1";
2813 httpmethodout
= httpmethod
;
2814 httpurlout
= str(httpurl
);
2815 httpheaderout
= geturls_text
.headers
;
2816 httpheadersentout
= httpheaders
;
2817 if(isinstance(httpheaderout
, list)):
2818 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2819 if(sys
.version
[0]=="2"):
2821 prehttpheaderout
= httpheaderout
;
2822 httpheaderkeys
= httpheaderout
.keys();
2823 imax
= len(httpheaderkeys
);
2827 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2829 except AttributeError:
2831 httpheaderout
= fix_header_names(httpheaderout
);
2832 if(isinstance(httpheadersentout
, list)):
2833 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2834 httpheadersentout
= fix_header_names(httpheadersentout
);
2835 downloadsize
= httpheaderout
.get('Content-Length');
2836 if(downloadsize
is not None):
2837 downloadsize
= int(downloadsize
);
2838 if downloadsize
is None: downloadsize
= 0;
2841 log
.info("Downloading URL "+httpurl
);
2842 with
BytesIO() as strbuf
:
2844 databytes
= geturls_text
.read();
2845 if not databytes
: break;
2846 datasize
= len(databytes
);
2847 fulldatasize
= datasize
+ fulldatasize
;
2850 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2851 downloaddiff
= fulldatasize
- prevdownsize
;
2852 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2853 prevdownsize
= fulldatasize
;
2854 strbuf
.write(databytes
);
2857 returnval_content
= strbuf
.read();
2858 geturls_text
.close();
2859 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2861 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2864 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2866 returnval_content
= zlib
.decompress(returnval_content
);
2869 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2871 returnval_content
= brotli
.decompress(returnval_content
);
2872 except brotli
.error
:
2874 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2876 returnval_content
= zstandard
.decompress(returnval_content
);
2877 except zstandard
.error
:
2879 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2881 returnval_content
= lzma
.decompress(returnval_content
);
2882 except zstandard
.error
:
2884 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2886 returnval_content
= bz2
.decompress(returnval_content
);
2887 except zstandard
.error
:
2889 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpcore"};
2890 geturls_text
.close();
2893 if(not havehttpcore
):
2894 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2895 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2899 def download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2900 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2901 exec_time_start
= time
.time();
2902 myhash
= hashlib
.new("sha1");
2903 if(sys
.version
[0]=="2"):
2904 myhash
.update(httpurl
);
2905 myhash
.update(str(buffersize
));
2906 myhash
.update(str(exec_time_start
));
2907 if(sys
.version
[0]>="3"):
2908 myhash
.update(httpurl
.encode('utf-8'));
2909 myhash
.update(str(buffersize
).encode('utf-8'));
2910 myhash
.update(str(exec_time_start
).encode('utf-8'));
2911 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2913 sleep
= geturls_download_sleep
;
2916 pretmpfilename
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2917 if(not pretmpfilename
):
2919 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2920 tmpfilename
= f
.name
;
2922 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2923 except AttributeError:
2925 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2930 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2931 f
.write(pretmpfilename
.get('Content'));
2933 exec_time_end
= time
.time();
2934 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2935 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2938 if(not havehttpcore
):
2939 def download_from_url_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2940 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2944 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2945 global geturls_download_sleep
, havezstd
, havebrotli
;
2947 sleep
= geturls_download_sleep
;
2950 if(not outfile
=="-"):
2951 outpath
= outpath
.rstrip(os
.path
.sep
);
2952 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2953 if(not os
.path
.exists(outpath
)):
2954 os
.makedirs(outpath
);
2955 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2957 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2959 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2960 if(not pretmpfilename
):
2962 tmpfilename
= pretmpfilename
.get('Filename');
2963 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2965 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2966 exec_time_start
= time
.time();
2967 shutil
.move(tmpfilename
, filepath
);
2969 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2970 except AttributeError:
2972 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2977 exec_time_end
= time
.time();
2978 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2979 if(os
.path
.exists(tmpfilename
)):
2980 os
.remove(tmpfilename
);
2981 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2983 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2984 tmpfilename
= pretmpfilename
.get('Filename');
2985 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2988 exec_time_start
= time
.time();
2989 with
open(tmpfilename
, 'rb') as ft
:
2992 databytes
= ft
.read(buffersize
[1]);
2993 if not databytes
: break;
2994 datasize
= len(databytes
);
2995 fulldatasize
= datasize
+ fulldatasize
;
2998 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2999 downloaddiff
= fulldatasize
- prevdownsize
;
3000 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3001 prevdownsize
= fulldatasize
;
3004 fdata
= f
.getvalue();
3007 os
.remove(tmpfilename
);
3008 exec_time_end
= time
.time();
3009 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3010 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3013 if(not havehttpcore
):
3014 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3015 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3019 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3020 global geturls_download_sleep
, havezstd
, havebrotli
;
3022 sleep
= geturls_download_sleep
;
3025 urlparts
= urlparse
.urlparse(httpurl
);
3026 if(isinstance(httpheaders
, list)):
3027 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3028 httpheaders
= fix_header_names(httpheaders
);
3029 if(httpuseragent
is not None):
3030 if('User-Agent' in httpheaders
):
3031 httpheaders
['User-Agent'] = httpuseragent
;
3033 httpuseragent
.update({'User-Agent': httpuseragent
});
3034 if(httpreferer
is not None):
3035 if('Referer' in httpheaders
):
3036 httpheaders
['Referer'] = httpreferer
;
3038 httpuseragent
.update({'Referer': httpreferer
});
3039 if(urlparts
.username
is not None or urlparts
.password
is not None):
3040 if(sys
.version
[0]=="2"):
3041 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3042 if(sys
.version
[0]>="3"):
3043 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3044 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3046 if(postdata
is not None and not isinstance(postdata
, dict)):
3047 postdata
= urlencode(postdata
);
3049 if(httpmethod
=="GET"):
3050 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3051 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3052 elif(httpmethod
=="POST"):
3053 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3054 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
3056 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3057 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3058 except httpcore
.ConnectTimeout
:
3059 log
.info("Error With URL "+httpurl
);
3061 except httpcore
.ConnectError
:
3062 log
.info("Error With URL "+httpurl
);
3064 except socket
.timeout
:
3065 log
.info("Error With URL "+httpurl
);
3067 httpcodeout
= geturls_text
.status
;
3068 httpcodereason
= http_status_to_reason(geturls_text
.status
);
3069 httpversionout
= "1.1";
3070 httpmethodout
= httpmethod
;
3071 httpurlout
= str(httpurl
);
3072 httpheaderout
= geturls_text
.headers
;
3073 httpheadersentout
= httpheaders
;
3074 if(isinstance(httpheaderout
, list)):
3075 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3076 if(sys
.version
[0]=="2"):
3078 prehttpheaderout
= httpheaderout
;
3079 httpheaderkeys
= httpheaderout
.keys();
3080 imax
= len(httpheaderkeys
);
3084 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3086 except AttributeError:
3088 httpheaderout
= fix_header_names(httpheaderout
);
3089 if(isinstance(httpheadersentout
, list)):
3090 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3091 httpheadersentout
= fix_header_names(httpheadersentout
);
3092 downloadsize
= httpheaderout
.get('Content-Length');
3093 if(downloadsize
is not None):
3094 downloadsize
= int(downloadsize
);
3095 if downloadsize
is None: downloadsize
= 0;
3098 log
.info("Downloading URL "+httpurl
);
3099 with
BytesIO() as strbuf
:
3101 databytes
= geturls_text
.read();
3102 if not databytes
: break;
3103 datasize
= len(databytes
);
3104 fulldatasize
= datasize
+ fulldatasize
;
3107 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3108 downloaddiff
= fulldatasize
- prevdownsize
;
3109 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3110 prevdownsize
= fulldatasize
;
3111 strbuf
.write(databytes
);
3114 returnval_content
= strbuf
.read();
3115 geturls_text
.close();
3116 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3118 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3121 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3123 returnval_content
= zlib
.decompress(returnval_content
);
3126 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3128 returnval_content
= brotli
.decompress(returnval_content
);
3129 except brotli
.error
:
3131 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3133 returnval_content
= zstandard
.decompress(returnval_content
);
3134 except zstandard
.error
:
3136 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3138 returnval_content
= lzma
.decompress(returnval_content
);
3139 except zstandard
.error
:
3141 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3143 returnval_content
= bz2
.decompress(returnval_content
);
3144 except zstandard
.error
:
3146 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpcore2"};
3147 geturls_text
.close();
3150 if(not havehttpcore
):
3151 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3152 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3156 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3157 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3158 exec_time_start
= time
.time();
3159 myhash
= hashlib
.new("sha1");
3160 if(sys
.version
[0]=="2"):
3161 myhash
.update(httpurl
);
3162 myhash
.update(str(buffersize
));
3163 myhash
.update(str(exec_time_start
));
3164 if(sys
.version
[0]>="3"):
3165 myhash
.update(httpurl
.encode('utf-8'));
3166 myhash
.update(str(buffersize
).encode('utf-8'));
3167 myhash
.update(str(exec_time_start
).encode('utf-8'));
3168 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3170 sleep
= geturls_download_sleep
;
3173 pretmpfilename
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3174 if(not pretmpfilename
):
3176 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3177 tmpfilename
= f
.name
;
3179 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3180 except AttributeError:
3182 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3187 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3188 f
.write(pretmpfilename
.get('Content'));
3190 exec_time_end
= time
.time();
3191 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3192 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3195 if(not havehttpcore
):
3196 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3197 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3201 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3202 global geturls_download_sleep
, havezstd
, havebrotli
;
3204 sleep
= geturls_download_sleep
;
3207 if(not outfile
=="-"):
3208 outpath
= outpath
.rstrip(os
.path
.sep
);
3209 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3210 if(not os
.path
.exists(outpath
)):
3211 os
.makedirs(outpath
);
3212 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3214 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3216 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3217 if(not pretmpfilename
):
3219 tmpfilename
= pretmpfilename
.get('Filename');
3220 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3222 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3223 exec_time_start
= time
.time();
3224 shutil
.move(tmpfilename
, filepath
);
3226 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3227 except AttributeError:
3229 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3234 exec_time_end
= time
.time();
3235 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3236 if(os
.path
.exists(tmpfilename
)):
3237 os
.remove(tmpfilename
);
3238 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3240 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3241 tmpfilename
= pretmpfilename
.get('Filename');
3242 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3245 exec_time_start
= time
.time();
3246 with
open(tmpfilename
, 'rb') as ft
:
3249 databytes
= ft
.read(buffersize
[1]);
3250 if not databytes
: break;
3251 datasize
= len(databytes
);
3252 fulldatasize
= datasize
+ fulldatasize
;
3255 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3256 downloaddiff
= fulldatasize
- prevdownsize
;
3257 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3258 prevdownsize
= fulldatasize
;
3261 fdata
= f
.getvalue();
3264 os
.remove(tmpfilename
);
3265 exec_time_end
= time
.time();
3266 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3267 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3271 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3272 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3276 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3277 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3280 if(not haveurllib3
):
3281 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3282 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3286 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3287 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3290 if(not haveurllib3
):
3291 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3292 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3296 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3297 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3300 if(not haveurllib3
):
3301 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3302 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3306 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3307 global geturls_download_sleep
, havezstd
, havebrotli
;
3309 sleep
= geturls_download_sleep
;
3312 urlparts
= urlparse
.urlparse(httpurl
);
3313 if(isinstance(httpheaders
, list)):
3314 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3315 httpheaders
= fix_header_names(httpheaders
);
3316 if(httpuseragent
is not None):
3317 if('User-Agent' in httpheaders
):
3318 httpheaders
['User-Agent'] = httpuseragent
;
3320 httpuseragent
.update({'User-Agent': httpuseragent
});
3321 if(httpreferer
is not None):
3322 if('Referer' in httpheaders
):
3323 httpheaders
['Referer'] = httpreferer
;
3325 httpuseragent
.update({'Referer': httpreferer
});
3326 if(urlparts
.username
is not None or urlparts
.password
is not None):
3327 if(sys
.version
[0]=="2"):
3328 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3329 if(sys
.version
[0]>="3"):
3330 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3331 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3333 timeout
= urllib3
.util
.Timeout(connect
=timeout
, read
=timeout
);
3334 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
, timeout
=timeout
);
3335 if(postdata
is not None and not isinstance(postdata
, dict)):
3336 postdata
= urlencode(postdata
);
3338 if(httpmethod
=="GET"):
3339 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3340 elif(httpmethod
=="POST"):
3341 geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
3343 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3344 except urllib3
.exceptions
.ConnectTimeoutError
:
3345 log
.info("Error With URL "+httpurl
);
3347 except urllib3
.exceptions
.ConnectError
:
3348 log
.info("Error With URL "+httpurl
);
3350 except urllib3
.exceptions
.MaxRetryError
:
3351 log
.info("Error With URL "+httpurl
);
3353 except socket
.timeout
:
3354 log
.info("Error With URL "+httpurl
);
3357 log
.info("Error With URL "+httpurl
);
3359 httpcodeout
= geturls_text
.status
;
3360 httpcodereason
= geturls_text
.reason
;
3361 if(geturls_text
.version
=="10"):
3362 httpversionout
= "1.0";
3364 httpversionout
= "1.1";
3365 httpmethodout
= httpmethod
;
3366 httpurlout
= geturls_text
.geturl();
3367 httpheaderout
= geturls_text
.info();
3368 httpheadersentout
= httpheaders
;
3369 if(isinstance(httpheaderout
, list)):
3370 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3371 if(sys
.version
[0]=="2"):
3373 prehttpheaderout
= httpheaderout
;
3374 httpheaderkeys
= httpheaderout
.keys();
3375 imax
= len(httpheaderkeys
);
3379 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3381 except AttributeError:
3383 httpheaderout
= fix_header_names(httpheaderout
);
3384 if(isinstance(httpheadersentout
, list)):
3385 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3386 httpheadersentout
= fix_header_names(httpheadersentout
);
3387 downloadsize
= httpheaderout
.get('Content-Length');
3388 if(downloadsize
is not None):
3389 downloadsize
= int(downloadsize
);
3390 if downloadsize
is None: downloadsize
= 0;
3393 log
.info("Downloading URL "+httpurl
);
3394 with
BytesIO() as strbuf
:
3396 databytes
= geturls_text
.read(buffersize
);
3397 if not databytes
: break;
3398 datasize
= len(databytes
);
3399 fulldatasize
= datasize
+ fulldatasize
;
3402 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3403 downloaddiff
= fulldatasize
- prevdownsize
;
3404 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3405 prevdownsize
= fulldatasize
;
3406 strbuf
.write(databytes
);
3408 returnval_content
= strbuf
.read();
3409 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3411 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3414 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3416 returnval_content
= zlib
.decompress(returnval_content
);
3419 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3421 returnval_content
= brotli
.decompress(returnval_content
);
3422 except brotli
.error
:
3424 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3426 returnval_content
= zstandard
.decompress(returnval_content
);
3427 except zstandard
.error
:
3429 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3431 returnval_content
= lzma
.decompress(returnval_content
);
3432 except zstandard
.error
:
3434 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3436 returnval_content
= bz2
.decompress(returnval_content
);
3437 except zstandard
.error
:
3439 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "urllib3"};
3440 geturls_text
.close();
3443 if(not haveurllib3
):
3444 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3445 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3449 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3450 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3451 exec_time_start
= time
.time();
3452 myhash
= hashlib
.new("sha1");
3453 if(sys
.version
[0]=="2"):
3454 myhash
.update(httpurl
);
3455 myhash
.update(str(buffersize
));
3456 myhash
.update(str(exec_time_start
));
3457 if(sys
.version
[0]>="3"):
3458 myhash
.update(httpurl
.encode('utf-8'));
3459 myhash
.update(str(buffersize
).encode('utf-8'));
3460 myhash
.update(str(exec_time_start
).encode('utf-8'));
3461 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3463 sleep
= geturls_download_sleep
;
3466 pretmpfilename
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3467 if(not pretmpfilename
):
3469 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3470 tmpfilename
= f
.name
;
3472 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3473 except AttributeError:
3475 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3480 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3481 f
.write(pretmpfilename
.get('Content'));
3483 exec_time_end
= time
.time();
3484 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3485 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3488 if(not haveurllib3
):
3489 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3490 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3494 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3495 global geturls_download_sleep
, havezstd
, havebrotli
;
3497 sleep
= geturls_download_sleep
;
3500 if(not outfile
=="-"):
3501 outpath
= outpath
.rstrip(os
.path
.sep
);
3502 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3503 if(not os
.path
.exists(outpath
)):
3504 os
.makedirs(outpath
);
3505 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3507 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3509 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3510 if(not pretmpfilename
):
3512 tmpfilename
= pretmpfilename
.get('Filename');
3513 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3515 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3516 exec_time_start
= time
.time();
3517 shutil
.move(tmpfilename
, filepath
);
3519 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3520 except AttributeError:
3522 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3527 exec_time_end
= time
.time();
3528 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3529 if(os
.path
.exists(tmpfilename
)):
3530 os
.remove(tmpfilename
);
3531 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3533 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3534 tmpfilename
= pretmpfilename
.get('Filename');
3535 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3538 exec_time_start
= time
.time();
3539 with
open(tmpfilename
, 'rb') as ft
:
3542 databytes
= ft
.read(buffersize
[1]);
3543 if not databytes
: break;
3544 datasize
= len(databytes
);
3545 fulldatasize
= datasize
+ fulldatasize
;
3548 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3549 downloaddiff
= fulldatasize
- prevdownsize
;
3550 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3551 prevdownsize
= fulldatasize
;
3554 fdata
= f
.getvalue();
3557 os
.remove(tmpfilename
);
3558 exec_time_end
= time
.time();
3559 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3560 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3563 if(not haveurllib3
):
3564 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3565 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3569 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3570 global geturls_download_sleep
, havezstd
, havebrotli
;
3572 sleep
= geturls_download_sleep
;
3575 urlparts
= urlparse
.urlparse(httpurl
);
3576 if(isinstance(httpheaders
, list)):
3577 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3578 httpheaders
= fix_header_names(httpheaders
);
3579 if(httpuseragent
is not None):
3580 if('User-Agent' in httpheaders
):
3581 httpheaders
['User-Agent'] = httpuseragent
;
3583 httpuseragent
.update({'User-Agent': httpuseragent
});
3584 if(httpreferer
is not None):
3585 if('Referer' in httpheaders
):
3586 httpheaders
['Referer'] = httpreferer
;
3588 httpuseragent
.update({'Referer': httpreferer
});
3589 if(urlparts
.username
is not None or urlparts
.password
is not None):
3590 if(sys
.version
[0]=="2"):
3591 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3592 if(sys
.version
[0]>="3"):
3593 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3594 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3595 geturls_opener
= mechanize
.Browser();
3596 if(isinstance(httpheaders
, dict)):
3597 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
3599 geturls_opener
.addheaders
= httpheaders
;
3600 geturls_opener
.set_cookiejar(httpcookie
);
3601 geturls_opener
.set_handle_robots(False);
3602 if(postdata
is not None and not isinstance(postdata
, dict)):
3603 postdata
= urlencode(postdata
);
3605 if(httpmethod
=="GET"):
3606 geturls_text
= geturls_opener
.open(httpurl
);
3607 elif(httpmethod
=="POST"):
3608 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
3610 geturls_text
= geturls_opener
.open(httpurl
);
3611 except mechanize
.HTTPError
as geturls_text_error
:
3612 geturls_text
= geturls_text_error
;
3613 log
.info("Error With URL "+httpurl
);
3615 log
.info("Error With URL "+httpurl
);
3617 except socket
.timeout
:
3618 log
.info("Error With URL "+httpurl
);
3620 httpcodeout
= geturls_text
.code
;
3621 httpcodereason
= geturls_text
.msg
;
3622 httpversionout
= "1.1";
3623 httpmethodout
= httpmethod
;
3624 httpurlout
= geturls_text
.geturl();
3625 httpheaderout
= geturls_text
.info();
3626 reqhead
= geturls_opener
.request
;
3627 httpheadersentout
= reqhead
.header_items();
3628 if(isinstance(httpheaderout
, list)):
3629 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3630 if(sys
.version
[0]=="2"):
3632 prehttpheaderout
= httpheaderout
;
3633 httpheaderkeys
= httpheaderout
.keys();
3634 imax
= len(httpheaderkeys
);
3638 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3640 except AttributeError:
3642 httpheaderout
= fix_header_names(httpheaderout
);
3643 if(isinstance(httpheadersentout
, list)):
3644 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3645 httpheadersentout
= fix_header_names(httpheadersentout
);
3646 downloadsize
= httpheaderout
.get('Content-Length');
3647 if(downloadsize
is not None):
3648 downloadsize
= int(downloadsize
);
3649 if downloadsize
is None: downloadsize
= 0;
3652 log
.info("Downloading URL "+httpurl
);
3653 with
BytesIO() as strbuf
:
3655 databytes
= geturls_text
.read(buffersize
);
3656 if not databytes
: break;
3657 datasize
= len(databytes
);
3658 fulldatasize
= datasize
+ fulldatasize
;
3661 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3662 downloaddiff
= fulldatasize
- prevdownsize
;
3663 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3664 prevdownsize
= fulldatasize
;
3665 strbuf
.write(databytes
);
3667 returnval_content
= strbuf
.read();
3668 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3670 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3673 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3675 returnval_content
= zlib
.decompress(returnval_content
);
3678 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3680 returnval_content
= brotli
.decompress(returnval_content
);
3681 except brotli
.error
:
3683 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3685 returnval_content
= zstandard
.decompress(returnval_content
);
3686 except zstandard
.error
:
3688 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3690 returnval_content
= lzma
.decompress(returnval_content
);
3691 except zstandard
.error
:
3693 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3695 returnval_content
= bz2
.decompress(returnval_content
);
3696 except zstandard
.error
:
3698 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "mechanize"};
3699 geturls_text
.close();
3702 if(not havemechanize
):
3703 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3704 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3708 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3709 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3710 exec_time_start
= time
.time();
3711 myhash
= hashlib
.new("sha1");
3712 if(sys
.version
[0]=="2"):
3713 myhash
.update(httpurl
);
3714 myhash
.update(str(buffersize
));
3715 myhash
.update(str(exec_time_start
));
3716 if(sys
.version
[0]>="3"):
3717 myhash
.update(httpurl
.encode('utf-8'));
3718 myhash
.update(str(buffersize
).encode('utf-8'));
3719 myhash
.update(str(exec_time_start
).encode('utf-8'));
3720 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3722 sleep
= geturls_download_sleep
;
3725 pretmpfilename
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3726 if(not pretmpfilename
):
3728 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3729 tmpfilename
= f
.name
;
3731 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3732 except AttributeError:
3734 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3739 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3740 f
.write(pretmpfilename
.get('Content'));
3742 exec_time_end
= time
.time();
3743 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3744 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3747 if(not havemechanize
):
3748 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3749 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3753 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3754 global geturls_download_sleep
, havezstd
, havebrotli
;
3756 sleep
= geturls_download_sleep
;
3759 if(not outfile
=="-"):
3760 outpath
= outpath
.rstrip(os
.path
.sep
);
3761 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3762 if(not os
.path
.exists(outpath
)):
3763 os
.makedirs(outpath
);
3764 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3766 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3768 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3769 if(not pretmpfilename
):
3771 tmpfilename
= pretmpfilename
.get('Filename');
3772 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3774 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3775 exec_time_start
= time
.time();
3776 shutil
.move(tmpfilename
, filepath
);
3778 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3779 except AttributeError:
3781 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3786 exec_time_end
= time
.time();
3787 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3788 if(os
.path
.exists(tmpfilename
)):
3789 os
.remove(tmpfilename
);
3790 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3792 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3793 tmpfilename
= pretmpfilename
.get('Filename');
3794 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3797 exec_time_start
= time
.time();
3798 with
open(tmpfilename
, 'rb') as ft
:
3801 databytes
= ft
.read(buffersize
[1]);
3802 if not databytes
: break;
3803 datasize
= len(databytes
);
3804 fulldatasize
= datasize
+ fulldatasize
;
3807 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3808 downloaddiff
= fulldatasize
- prevdownsize
;
3809 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3810 prevdownsize
= fulldatasize
;
3813 fdata
= f
.getvalue();
3816 os
.remove(tmpfilename
);
3817 exec_time_end
= time
.time();
3818 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3819 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3822 if(not havemechanize
):
3823 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3824 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3828 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3829 global geturls_download_sleep
, havezstd
, havebrotli
;
3831 sleep
= geturls_download_sleep
;
3834 urlparts
= urlparse
.urlparse(httpurl
);
3835 if(isinstance(httpheaders
, list)):
3836 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3837 httpheaders
= fix_header_names(httpheaders
);
3838 if(httpuseragent
is not None):
3839 if('User-Agent' in httpheaders
):
3840 httpheaders
['User-Agent'] = httpuseragent
;
3842 httpuseragent
.update({'User-Agent': httpuseragent
});
3843 if(httpreferer
is not None):
3844 if('Referer' in httpheaders
):
3845 httpheaders
['Referer'] = httpreferer
;
3847 httpuseragent
.update({'Referer': httpreferer
});
3848 if(urlparts
.username
is not None or urlparts
.password
is not None):
3849 if(sys
.version
[0]=="2"):
3850 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3851 if(sys
.version
[0]>="3"):
3852 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3853 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3854 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
3855 if(isinstance(httpheaders
, dict)):
3856 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
3857 geturls_opener
.addheaders
= httpheaders
;
3859 if(postdata
is not None and not isinstance(postdata
, dict)):
3860 postdata
= urlencode(postdata
);
3861 retrieved_body
= BytesIO();
3862 retrieved_headers
= BytesIO();
3864 if(httpmethod
=="GET"):
3865 geturls_text
= pycurl
.Curl();
3866 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3867 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3868 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3869 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3870 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3871 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3872 geturls_text
.perform();
3873 elif(httpmethod
=="POST"):
3874 geturls_text
= pycurl
.Curl();
3875 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3876 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3877 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3878 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3879 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3880 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3881 geturls_text
.setopt(geturls_text
.POST
, True);
3882 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
3883 geturls_text
.perform();
3885 geturls_text
= pycurl
.Curl();
3886 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3887 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3888 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3889 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3890 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3891 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3892 geturls_text
.perform();
3893 retrieved_headers
.seek(0);
3894 if(sys
.version
[0]=="2"):
3895 pycurlhead
= retrieved_headers
.read();
3896 if(sys
.version
[0]>="3"):
3897 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
3898 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip().rstrip('\r\n'))[0];
3899 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
3900 retrieved_body
.seek(0);
3901 except socket
.timeout
:
3902 log
.info("Error With URL "+httpurl
);
3904 except socket
.gaierror
:
3905 log
.info("Error With URL "+httpurl
);
3908 log
.info("Error With URL "+httpurl
);
3910 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
3911 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
3912 httpversionout
= pyhttpverinfo
[0];
3913 httpmethodout
= httpmethod
;
3914 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
3915 httpheaderout
= pycurlheadersout
;
3916 httpheadersentout
= httpheaders
;
3917 if(isinstance(httpheaderout
, list)):
3918 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
3919 if(sys
.version
[0]=="2"):
3921 prehttpheaderout
= httpheaderout
;
3922 httpheaderkeys
= httpheaderout
.keys();
3923 imax
= len(httpheaderkeys
);
3927 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3929 except AttributeError:
3931 httpheaderout
= fix_header_names(httpheaderout
);
3932 if(isinstance(httpheadersentout
, list)):
3933 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
3934 httpheadersentout
= fix_header_names(httpheadersentout
);
3935 downloadsize
= httpheaderout
.get('Content-Length');
3936 if(downloadsize
is not None):
3937 downloadsize
= int(downloadsize
);
3938 if downloadsize
is None: downloadsize
= 0;
3941 log
.info("Downloading URL "+httpurl
);
3942 with
BytesIO() as strbuf
:
3944 databytes
= retrieved_body
.read(buffersize
);
3945 if not databytes
: break;
3946 datasize
= len(databytes
);
3947 fulldatasize
= datasize
+ fulldatasize
;
3950 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3951 downloaddiff
= fulldatasize
- prevdownsize
;
3952 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3953 prevdownsize
= fulldatasize
;
3954 strbuf
.write(databytes
);
3956 returnval_content
= strbuf
.read();
3957 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3959 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3962 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3964 returnval_content
= zlib
.decompress(returnval_content
);
3967 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3969 returnval_content
= brotli
.decompress(returnval_content
);
3970 except brotli
.error
:
3972 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3974 returnval_content
= zstandard
.decompress(returnval_content
);
3975 except zstandard
.error
:
3977 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3979 returnval_content
= lzma
.decompress(returnval_content
);
3980 except zstandard
.error
:
3982 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3984 returnval_content
= bz2
.decompress(returnval_content
);
3985 except zstandard
.error
:
3987 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl"};
3988 geturls_text
.close();
3992 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3993 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3997 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3998 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3999 exec_time_start
= time
.time();
4000 myhash
= hashlib
.new("sha1");
4001 if(sys
.version
[0]=="2"):
4002 myhash
.update(httpurl
);
4003 myhash
.update(str(buffersize
));
4004 myhash
.update(str(exec_time_start
));
4005 if(sys
.version
[0]>="3"):
4006 myhash
.update(httpurl
.encode('utf-8'));
4007 myhash
.update(str(buffersize
).encode('utf-8'));
4008 myhash
.update(str(exec_time_start
).encode('utf-8'));
4009 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4011 sleep
= geturls_download_sleep
;
4014 pretmpfilename
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4015 if(not pretmpfilename
):
4017 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4018 tmpfilename
= f
.name
;
4020 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4021 except AttributeError:
4023 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4028 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4029 f
.write(pretmpfilename
.get('Content'));
4031 exec_time_end
= time
.time();
4032 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4033 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4037 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4038 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4042 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4043 global geturls_download_sleep
, havezstd
, havebrotli
;
4045 sleep
= geturls_download_sleep
;
4048 if(not outfile
=="-"):
4049 outpath
= outpath
.rstrip(os
.path
.sep
);
4050 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4051 if(not os
.path
.exists(outpath
)):
4052 os
.makedirs(outpath
);
4053 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4055 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4057 pretmpfilename
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4058 if(not pretmpfilename
):
4060 tmpfilename
= pretmpfilename
.get('Filename');
4061 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4063 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4064 exec_time_start
= time
.time();
4065 shutil
.move(tmpfilename
, filepath
);
4067 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4068 except AttributeError:
4070 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4075 exec_time_end
= time
.time();
4076 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4077 if(os
.path
.exists(tmpfilename
)):
4078 os
.remove(tmpfilename
);
4079 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4081 pretmpfilename
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4082 tmpfilename
= pretmpfilename
.get('Filename');
4083 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4086 exec_time_start
= time
.time();
4087 with
open(tmpfilename
, 'rb') as ft
:
4090 databytes
= ft
.read(buffersize
[1]);
4091 if not databytes
: break;
4092 datasize
= len(databytes
);
4093 fulldatasize
= datasize
+ fulldatasize
;
4096 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4097 downloaddiff
= fulldatasize
- prevdownsize
;
4098 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4099 prevdownsize
= fulldatasize
;
4102 fdata
= f
.getvalue();
4105 os
.remove(tmpfilename
);
4106 exec_time_end
= time
.time();
4107 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4108 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4112 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4113 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4116 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4117 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4118 global geturls_download_sleep
, havezstd
, havebrotli
;
4120 sleep
= geturls_download_sleep
;
4123 urlparts
= urlparse
.urlparse(httpurl
);
4124 if(isinstance(httpheaders
, list)):
4125 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4126 httpheaders
= fix_header_names(httpheaders
);
4127 if(httpuseragent
is not None):
4128 if('User-Agent' in httpheaders
):
4129 httpheaders
['User-Agent'] = httpuseragent
;
4131 httpuseragent
.update({'User-Agent': httpuseragent
});
4132 if(httpreferer
is not None):
4133 if('Referer' in httpheaders
):
4134 httpheaders
['Referer'] = httpreferer
;
4136 httpuseragent
.update({'Referer': httpreferer
});
4137 if(urlparts
.username
is not None or urlparts
.password
is not None):
4138 if(sys
.version
[0]=="2"):
4139 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
4140 if(sys
.version
[0]>="3"):
4141 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4142 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4143 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
4144 if(isinstance(httpheaders
, dict)):
4145 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
4146 geturls_opener
.addheaders
= httpheaders
;
4148 if(postdata
is not None and not isinstance(postdata
, dict)):
4149 postdata
= urlencode(postdata
);
4150 retrieved_body
= BytesIO();
4151 retrieved_headers
= BytesIO();
4153 if(httpmethod
=="GET"):
4154 geturls_text
= pycurl
.Curl();
4155 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4156 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4157 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4158 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4159 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4160 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4161 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4162 geturls_text
.perform();
4163 elif(httpmethod
=="POST"):
4164 geturls_text
= pycurl
.Curl();
4165 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4166 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4167 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4168 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4169 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4170 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4171 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4172 geturls_text
.setopt(geturls_text
.POST
, True);
4173 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
4174 geturls_text
.perform();
4176 geturls_text
= pycurl
.Curl();
4177 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4178 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4179 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4180 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4181 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4182 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4183 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4184 geturls_text
.perform();
4185 retrieved_headers
.seek(0);
4186 if(sys
.version
[0]=="2"):
4187 pycurlhead
= retrieved_headers
.read();
4188 if(sys
.version
[0]>="3"):
4189 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
4190 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip())[0];
4191 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
4192 retrieved_body
.seek(0);
4193 except socket
.timeout
:
4194 log
.info("Error With URL "+httpurl
);
4196 except socket
.gaierror
:
4197 log
.info("Error With URL "+httpurl
);
4200 log
.info("Error With URL "+httpurl
);
4202 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
4203 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
4204 httpversionout
= pyhttpverinfo
[0];
4205 httpmethodout
= httpmethod
;
4206 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
4207 httpheaderout
= pycurlheadersout
;
4208 httpheadersentout
= httpheaders
;
4209 if(isinstance(httpheaderout
, list)):
4210 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
4211 if(sys
.version
[0]=="2"):
4213 prehttpheaderout
= httpheaderout
;
4214 httpheaderkeys
= httpheaderout
.keys();
4215 imax
= len(httpheaderkeys
);
4219 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4221 except AttributeError:
4223 httpheaderout
= fix_header_names(httpheaderout
);
4224 if(isinstance(httpheadersentout
, list)):
4225 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
4226 httpheadersentout
= fix_header_names(httpheadersentout
);
4227 downloadsize
= httpheaderout
.get('Content-Length');
4228 if(downloadsize
is not None):
4229 downloadsize
= int(downloadsize
);
4230 if downloadsize
is None: downloadsize
= 0;
4233 log
.info("Downloading URL "+httpurl
);
4234 with
BytesIO() as strbuf
:
4236 databytes
= retrieved_body
.read(buffersize
);
4237 if not databytes
: break;
4238 datasize
= len(databytes
);
4239 fulldatasize
= datasize
+ fulldatasize
;
4242 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4243 downloaddiff
= fulldatasize
- prevdownsize
;
4244 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4245 prevdownsize
= fulldatasize
;
4246 strbuf
.write(databytes
);
4248 returnval_content
= strbuf
.read();
4249 if(httpheaderout
.get("Content-Encoding")=="gzip"):
4251 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
4254 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
4256 returnval_content
= zlib
.decompress(returnval_content
);
4259 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4261 returnval_content
= brotli
.decompress(returnval_content
);
4262 except brotli
.error
:
4264 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
4266 returnval_content
= zstandard
.decompress(returnval_content
);
4267 except zstandard
.error
:
4269 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
4271 returnval_content
= lzma
.decompress(returnval_content
);
4272 except zstandard
.error
:
4274 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
4276 returnval_content
= bz2
.decompress(returnval_content
);
4277 except zstandard
.error
:
4279 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl2"};
4280 geturls_text
.close();
4284 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4285 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4288 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4289 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4290 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4293 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4294 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4295 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4296 exec_time_start
= time
.time();
4297 myhash
= hashlib
.new("sha1");
4298 if(sys
.version
[0]=="2"):
4299 myhash
.update(httpurl
);
4300 myhash
.update(str(buffersize
));
4301 myhash
.update(str(exec_time_start
));
4302 if(sys
.version
[0]>="3"):
4303 myhash
.update(httpurl
.encode('utf-8'));
4304 myhash
.update(str(buffersize
).encode('utf-8'));
4305 myhash
.update(str(exec_time_start
).encode('utf-8'));
4306 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4308 sleep
= geturls_download_sleep
;
4311 pretmpfilename
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4312 if(not pretmpfilename
):
4314 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4315 tmpfilename
= f
.name
;
4317 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4318 except AttributeError:
4320 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4325 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4326 f
.write(pretmpfilename
.get('Content'));
4328 exec_time_end
= time
.time();
4329 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4330 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4334 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4335 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4338 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4339 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4340 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4343 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4344 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4345 global geturls_download_sleep
, havezstd
, havebrotli
;
4347 sleep
= geturls_download_sleep
;
4350 if(not outfile
=="-"):
4351 outpath
= outpath
.rstrip(os
.path
.sep
);
4352 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4353 if(not os
.path
.exists(outpath
)):
4354 os
.makedirs(outpath
);
4355 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4357 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4359 pretmpfilename
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4360 if(not pretmpfilename
):
4362 tmpfilename
= pretmpfilename
.get('Filename');
4363 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4365 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4366 exec_time_start
= time
.time();
4367 shutil
.move(tmpfilename
, filepath
);
4369 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4370 except AttributeError:
4372 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4377 exec_time_end
= time
.time();
4378 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4379 if(os
.path
.exists(tmpfilename
)):
4380 os
.remove(tmpfilename
);
4381 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4383 pretmpfilename
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4384 tmpfilename
= pretmpfilename
.get('Filename');
4385 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4388 exec_time_start
= time
.time();
4389 with
open(tmpfilename
, 'rb') as ft
:
4392 databytes
= ft
.read(buffersize
[1]);
4393 if not databytes
: break;
4394 datasize
= len(databytes
);
4395 fulldatasize
= datasize
+ fulldatasize
;
4398 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4399 downloaddiff
= fulldatasize
- prevdownsize
;
4400 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4401 prevdownsize
= fulldatasize
;
4404 fdata
= f
.getvalue();
4407 os
.remove(tmpfilename
);
4408 exec_time_end
= time
.time();
4409 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4410 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4414 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4415 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4418 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4419 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4420 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4423 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4424 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4425 global geturls_download_sleep
, havezstd
, havebrotli
;
4427 sleep
= geturls_download_sleep
;
4430 urlparts
= urlparse
.urlparse(httpurl
);
4431 if(isinstance(httpheaders
, list)):
4432 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4433 httpheaders
= fix_header_names(httpheaders
);
4434 if(httpuseragent
is not None):
4435 if('User-Agent' in httpheaders
):
4436 httpheaders
['User-Agent'] = httpuseragent
;
4438 httpuseragent
.update({'User-Agent': httpuseragent
});
4439 if(httpreferer
is not None):
4440 if('Referer' in httpheaders
):
4441 httpheaders
['Referer'] = httpreferer
;
4443 httpuseragent
.update({'Referer': httpreferer
});
4444 if(urlparts
.username
is not None or urlparts
.password
is not None):
4445 if(sys
.version
[0]=="2"):
4446 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
4447 if(sys
.version
[0]>="3"):
4448 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4449 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4450 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
4451 if(isinstance(httpheaders
, dict)):
4452 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
4453 geturls_opener
.addheaders
= httpheaders
;
4455 if(postdata
is not None and not isinstance(postdata
, dict)):
4456 postdata
= urlencode(postdata
);
4457 retrieved_body
= BytesIO();
4458 retrieved_headers
= BytesIO();
4460 if(httpmethod
=="GET"):
4461 geturls_text
= pycurl
.Curl();
4462 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4463 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4464 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4465 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4466 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4467 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4468 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4469 geturls_text
.perform();
4470 elif(httpmethod
=="POST"):
4471 geturls_text
= pycurl
.Curl();
4472 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4473 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4474 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4475 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4476 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4477 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4478 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4479 geturls_text
.setopt(geturls_text
.POST
, True);
4480 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
4481 geturls_text
.perform();
4483 geturls_text
= pycurl
.Curl();
4484 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4485 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4486 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4487 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4488 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4489 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4490 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4491 geturls_text
.perform();
4492 retrieved_headers
.seek(0);
4493 if(sys
.version
[0]=="2"):
4494 pycurlhead
= retrieved_headers
.read();
4495 if(sys
.version
[0]>="3"):
4496 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
4497 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip().rstrip('\r\n'))[0];
4498 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
4499 retrieved_body
.seek(0);
4500 except socket
.timeout
:
4501 log
.info("Error With URL "+httpurl
);
4503 except socket
.gaierror
:
4504 log
.info("Error With URL "+httpurl
);
4507 log
.info("Error With URL "+httpurl
);
4509 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
4510 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
4511 httpversionout
= pyhttpverinfo
[0];
4512 httpmethodout
= httpmethod
;
4513 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
4514 httpheaderout
= pycurlheadersout
;
4515 httpheadersentout
= httpheaders
;
4516 if(isinstance(httpheaderout
, list)):
4517 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
4518 if(sys
.version
[0]=="2"):
4520 prehttpheaderout
= httpheaderout
;
4521 httpheaderkeys
= httpheaderout
.keys();
4522 imax
= len(httpheaderkeys
);
4526 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4528 except AttributeError:
4530 httpheaderout
= fix_header_names(httpheaderout
);
4531 if(isinstance(httpheadersentout
, list)):
4532 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
4533 httpheadersentout
= fix_header_names(httpheadersentout
);
4534 downloadsize
= httpheaderout
.get('Content-Length');
4535 if(downloadsize
is not None):
4536 downloadsize
= int(downloadsize
);
4537 if downloadsize
is None: downloadsize
= 0;
4540 log
.info("Downloading URL "+httpurl
);
4541 with
BytesIO() as strbuf
:
4543 databytes
= retrieved_body
.read(buffersize
);
4544 if not databytes
: break;
4545 datasize
= len(databytes
);
4546 fulldatasize
= datasize
+ fulldatasize
;
4549 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4550 downloaddiff
= fulldatasize
- prevdownsize
;
4551 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4552 prevdownsize
= fulldatasize
;
4553 strbuf
.write(databytes
);
4555 returnval_content
= strbuf
.read();
4556 if(httpheaderout
.get("Content-Encoding")=="gzip"):
4558 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
4561 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
4563 returnval_content
= zlib
.decompress(returnval_content
);
4566 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4568 returnval_content
= brotli
.decompress(returnval_content
);
4569 except brotli
.error
:
4571 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
4573 returnval_content
= zstandard
.decompress(returnval_content
);
4574 except zstandard
.error
:
4576 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
4578 returnval_content
= lzma
.decompress(returnval_content
);
4579 except zstandard
.error
:
4581 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
4583 returnval_content
= bz2
.decompress(returnval_content
);
4584 except zstandard
.error
:
4586 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl3"};
4587 geturls_text
.close();
4591 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4592 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4595 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4596 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4597 returnval
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4600 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4601 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4602 returnval
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4605 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4606 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4607 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4608 exec_time_start
= time
.time();
4609 myhash
= hashlib
.new("sha1");
4610 if(sys
.version
[0]=="2"):
4611 myhash
.update(httpurl
);
4612 myhash
.update(str(buffersize
));
4613 myhash
.update(str(exec_time_start
));
4614 if(sys
.version
[0]>="3"):
4615 myhash
.update(httpurl
.encode('utf-8'));
4616 myhash
.update(str(buffersize
).encode('utf-8'));
4617 myhash
.update(str(exec_time_start
).encode('utf-8'));
4618 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4620 sleep
= geturls_download_sleep
;
4623 pretmpfilename
= download_from_url_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4624 if(not pretmpfilename
):
4626 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4627 tmpfilename
= f
.name
;
4629 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4630 except AttributeError:
4632 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4637 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4638 f
.write(pretmpfilename
.get('Content'));
4640 exec_time_end
= time
.time();
4641 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4642 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4646 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4647 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4650 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4651 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4652 returnval
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4655 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4656 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4657 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4660 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4661 def download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4662 global geturls_download_sleep
, havezstd
, havebrotli
;
4664 sleep
= geturls_download_sleep
;
4667 if(not outfile
=="-"):
4668 outpath
= outpath
.rstrip(os
.path
.sep
);
4669 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4670 if(not os
.path
.exists(outpath
)):
4671 os
.makedirs(outpath
);
4672 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4674 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4676 pretmpfilename
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4677 if(not pretmpfilename
):
4679 tmpfilename
= pretmpfilename
.get('Filename');
4680 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4682 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4683 exec_time_start
= time
.time();
4684 shutil
.move(tmpfilename
, filepath
);
4686 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4687 except AttributeError:
4689 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4694 exec_time_end
= time
.time();
4695 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4696 if(os
.path
.exists(tmpfilename
)):
4697 os
.remove(tmpfilename
);
4698 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4700 pretmpfilename
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4701 tmpfilename
= pretmpfilename
.get('Filename');
4702 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4705 exec_time_start
= time
.time();
4706 with
open(tmpfilename
, 'rb') as ft
:
4709 databytes
= ft
.read(buffersize
[1]);
4710 if not databytes
: break;
4711 datasize
= len(databytes
);
4712 fulldatasize
= datasize
+ fulldatasize
;
4715 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4716 downloaddiff
= fulldatasize
- prevdownsize
;
4717 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4718 prevdownsize
= fulldatasize
;
4721 fdata
= f
.getvalue();
4724 os
.remove(tmpfilename
);
4725 exec_time_end
= time
.time();
4726 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4727 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4731 def download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4732 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4735 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4736 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4737 returnval
= download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4740 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4741 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4742 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4745 def download_file_from_ftp_file(url
):
4746 urlparts
= urlparse
.urlparse(url
);
4747 file_name
= os
.path
.basename(urlparts
.path
);
4748 file_dir
= os
.path
.dirname(urlparts
.path
);
4749 if(urlparts
.username
is not None):
4750 ftp_username
= urlparts
.username
;
4752 ftp_username
= "anonymous";
4753 if(urlparts
.password
is not None):
4754 ftp_password
= urlparts
.password
;
4755 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4756 ftp_password
= "anonymous";
4759 if(urlparts
.scheme
=="ftp"):
4761 elif(urlparts
.scheme
=="ftps"):
4765 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4767 ftp_port
= urlparts
.port
;
4768 if(urlparts
.port
is None):
4771 ftp
.connect(urlparts
.hostname
, ftp_port
);
4772 except socket
.gaierror
:
4773 log
.info("Error With URL "+httpurl
);
4775 except socket
.timeout
:
4776 log
.info("Error With URL "+httpurl
);
4778 ftp
.login(urlparts
.username
, urlparts
.password
);
4779 if(urlparts
.scheme
=="ftps"):
4781 ftpfile
= BytesIO();
4782 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
4783 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
4788 def download_file_from_ftp_string(url
):
4789 ftpfile
= download_file_from_ftp_file(url
);
4790 return ftpfile
.read();
4792 def download_from_url_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4793 global geturls_download_sleep
, havezstd
, havebrotli
;
4795 sleep
= geturls_download_sleep
;
4798 urlparts
= urlparse
.urlparse(httpurl
);
4799 if(isinstance(httpheaders
, list)):
4800 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4801 httpheaders
= fix_header_names(httpheaders
);
4802 if(httpuseragent
is not None):
4803 if('User-Agent' in httpheaders
):
4804 httpheaders
['User-Agent'] = httpuseragent
;
4806 httpuseragent
.update({'User-Agent': httpuseragent
});
4807 if(httpreferer
is not None):
4808 if('Referer' in httpheaders
):
4809 httpheaders
['Referer'] = httpreferer
;
4811 httpuseragent
.update({'Referer': httpreferer
});
4812 if(isinstance(httpheaders
, dict)):
4813 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4815 geturls_text
= download_file_from_ftp_file(httpurl
);
4816 if(not geturls_text
):
4818 downloadsize
= None;
4819 if(downloadsize
is not None):
4820 downloadsize
= int(downloadsize
);
4821 if downloadsize
is None: downloadsize
= 0;
4824 log
.info("Downloading URL "+httpurl
);
4825 with
BytesIO() as strbuf
:
4827 databytes
= geturls_text
.read(buffersize
);
4828 if not databytes
: break;
4829 datasize
= len(databytes
);
4830 fulldatasize
= datasize
+ fulldatasize
;
4833 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4834 downloaddiff
= fulldatasize
- prevdownsize
;
4835 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4836 prevdownsize
= fulldatasize
;
4837 strbuf
.write(databytes
);
4839 returnval_content
= strbuf
.read();
4840 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4841 geturls_text
.close();
4844 def download_from_url_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4845 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4846 exec_time_start
= time
.time();
4847 myhash
= hashlib
.new("sha1");
4848 if(sys
.version
[0]=="2"):
4849 myhash
.update(httpurl
);
4850 myhash
.update(str(buffersize
));
4851 myhash
.update(str(exec_time_start
));
4852 if(sys
.version
[0]>="3"):
4853 myhash
.update(httpurl
.encode('utf-8'));
4854 myhash
.update(str(buffersize
).encode('utf-8'));
4855 myhash
.update(str(exec_time_start
).encode('utf-8'));
4856 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4858 sleep
= geturls_download_sleep
;
4861 pretmpfilename
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4862 if(not pretmpfilename
):
4864 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4865 tmpfilename
= f
.name
;
4867 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4868 except AttributeError:
4870 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4875 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4876 f
.write(pretmpfilename
.get('Content'));
4878 exec_time_end
= time
.time();
4879 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4880 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4883 def download_from_url_to_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4884 global geturls_download_sleep
, havezstd
, havebrotli
;
4886 sleep
= geturls_download_sleep
;
4889 if(not outfile
=="-"):
4890 outpath
= outpath
.rstrip(os
.path
.sep
);
4891 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4892 if(not os
.path
.exists(outpath
)):
4893 os
.makedirs(outpath
);
4894 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4896 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4898 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4899 if(not pretmpfilename
):
4901 tmpfilename
= pretmpfilename
.get('Filename');
4902 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4904 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4905 exec_time_start
= time
.time();
4906 shutil
.move(tmpfilename
, filepath
);
4907 exec_time_end
= time
.time();
4908 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4909 if(os
.path
.exists(tmpfilename
)):
4910 os
.remove(tmpfilename
);
4911 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4913 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4914 tmpfilename
= pretmpfilename
.get('Filename');
4915 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4918 exec_time_start
= time
.time();
4919 with
open(tmpfilename
, 'rb') as ft
:
4922 databytes
= ft
.read(buffersize
[1]);
4923 if not databytes
: break;
4924 datasize
= len(databytes
);
4925 fulldatasize
= datasize
+ fulldatasize
;
4928 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4929 downloaddiff
= fulldatasize
- prevdownsize
;
4930 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4931 prevdownsize
= fulldatasize
;
4934 fdata
= f
.getvalue();
4937 os
.remove(tmpfilename
);
4938 exec_time_end
= time
.time();
4939 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4940 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4943 def upload_file_to_ftp_file(ftpfile
, url
):
4944 urlparts
= urlparse
.urlparse(url
);
4945 file_name
= os
.path
.basename(urlparts
.path
);
4946 file_dir
= os
.path
.dirname(urlparts
.path
);
4947 if(urlparts
.username
is not None):
4948 ftp_username
= urlparts
.username
;
4950 ftp_username
= "anonymous";
4951 if(urlparts
.password
is not None):
4952 ftp_password
= urlparts
.password
;
4953 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4954 ftp_password
= "anonymous";
4957 if(urlparts
.scheme
=="ftp"):
4959 elif(urlparts
.scheme
=="ftps"):
4963 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4965 ftp_port
= urlparts
.port
;
4966 if(urlparts
.port
is None):
4969 ftp
.connect(urlparts
.hostname
, ftp_port
);
4970 except socket
.gaierror
:
4971 log
.info("Error With URL "+httpurl
);
4973 except socket
.timeout
:
4974 log
.info("Error With URL "+httpurl
);
4976 ftp
.login(urlparts
.username
, urlparts
.password
);
4977 if(urlparts
.scheme
=="ftps"):
4979 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
4984 def upload_file_to_ftp_string(ftpstring
, url
):
4985 ftpfileo
= BytesIO(ftpstring
);
4986 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
4991 def download_file_from_sftp_file(url
):
4992 urlparts
= urlparse
.urlparse(url
);
4993 file_name
= os
.path
.basename(urlparts
.path
);
4994 file_dir
= os
.path
.dirname(urlparts
.path
);
4995 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4997 sftp_port
= urlparts
.port
;
4998 if(urlparts
.port
is None):
5001 sftp_port
= urlparts
.port
;
5002 if(urlparts
.username
is not None):
5003 sftp_username
= urlparts
.username
;
5005 sftp_username
= "anonymous";
5006 if(urlparts
.password
is not None):
5007 sftp_password
= urlparts
.password
;
5008 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5009 sftp_password
= "anonymous";
5012 if(urlparts
.scheme
!="sftp"):
5014 ssh
= paramiko
.SSHClient();
5015 ssh
.load_system_host_keys();
5016 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
5018 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5019 except paramiko
.ssh_exception
.SSHException
:
5021 except socket
.gaierror
:
5022 log
.info("Error With URL "+httpurl
);
5024 except socket
.timeout
:
5025 log
.info("Error With URL "+httpurl
);
5027 sftp
= ssh
.open_sftp();
5028 sftpfile
= BytesIO();
5029 sftp
.getfo(urlparts
.path
, sftpfile
);
5032 sftpfile
.seek(0, 0);
5035 def download_file_from_sftp_file(url
):
5039 def download_file_from_sftp_string(url
):
5040 sftpfile
= download_file_from_sftp_file(url
);
5041 return sftpfile
.read();
5043 def download_file_from_ftp_string(url
):
5047 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5048 global geturls_download_sleep
, havezstd
, havebrotli
;
5050 sleep
= geturls_download_sleep
;
5053 urlparts
= urlparse
.urlparse(httpurl
);
5054 if(isinstance(httpheaders
, list)):
5055 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
5056 httpheaders
= fix_header_names(httpheaders
);
5057 if(httpuseragent
is not None):
5058 if('User-Agent' in httpheaders
):
5059 httpheaders
['User-Agent'] = httpuseragent
;
5061 httpuseragent
.update({'User-Agent': httpuseragent
});
5062 if(httpreferer
is not None):
5063 if('Referer' in httpheaders
):
5064 httpheaders
['Referer'] = httpreferer
;
5066 httpuseragent
.update({'Referer': httpreferer
});
5067 if(isinstance(httpheaders
, dict)):
5068 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
5070 geturls_text
= download_file_from_sftp_file(httpurl
);
5071 if(not geturls_text
):
5073 downloadsize
= None;
5074 if(downloadsize
is not None):
5075 downloadsize
= int(downloadsize
);
5076 if downloadsize
is None: downloadsize
= 0;
5079 log
.info("Downloading URL "+httpurl
);
5080 with
BytesIO() as strbuf
:
5082 databytes
= geturls_text
.read(buffersize
);
5083 if not databytes
: break;
5084 datasize
= len(databytes
);
5085 fulldatasize
= datasize
+ fulldatasize
;
5088 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5089 downloaddiff
= fulldatasize
- prevdownsize
;
5090 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5091 prevdownsize
= fulldatasize
;
5092 strbuf
.write(databytes
);
5094 returnval_content
= strbuf
.read();
5095 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5096 geturls_text
.close();
5099 if(not haveparamiko
):
5100 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5104 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5105 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
5106 exec_time_start
= time
.time();
5107 myhash
= hashlib
.new("sha1");
5108 if(sys
.version
[0]=="2"):
5109 myhash
.update(httpurl
);
5110 myhash
.update(str(buffersize
));
5111 myhash
.update(str(exec_time_start
));
5112 if(sys
.version
[0]>="3"):
5113 myhash
.update(httpurl
.encode('utf-8'));
5114 myhash
.update(str(buffersize
).encode('utf-8'));
5115 myhash
.update(str(exec_time_start
).encode('utf-8'));
5116 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
5118 sleep
= geturls_download_sleep
;
5121 pretmpfilename
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
5122 if(not pretmpfilename
):
5124 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
5125 tmpfilename
= f
.name
;
5127 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
5128 except AttributeError:
5130 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
5135 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5136 f
.write(pretmpfilename
.get('Content'));
5138 exec_time_end
= time
.time();
5139 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
5140 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
5143 if(not haveparamiko
):
5144 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5148 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5149 global geturls_download_sleep
, havezstd
, havebrotli
;
5151 sleep
= geturls_download_sleep
;
5154 if(not outfile
=="-"):
5155 outpath
= outpath
.rstrip(os
.path
.sep
);
5156 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
5157 if(not os
.path
.exists(outpath
)):
5158 os
.makedirs(outpath
);
5159 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
5161 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
5163 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5164 if(not pretmpfilename
):
5166 tmpfilename
= pretmpfilename
.get('Filename');
5167 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5169 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
5170 exec_time_start
= time
.time();
5171 shutil
.move(tmpfilename
, filepath
);
5172 exec_time_end
= time
.time();
5173 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
5174 if(os
.path
.exists(tmpfilename
)):
5175 os
.remove(tmpfilename
);
5176 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5178 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5179 tmpfilename
= pretmpfilename
.get('Filename');
5180 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5183 exec_time_start
= time
.time();
5184 with
open(tmpfilename
, 'rb') as ft
:
5187 databytes
= ft
.read(buffersize
[1]);
5188 if not databytes
: break;
5189 datasize
= len(databytes
);
5190 fulldatasize
= datasize
+ fulldatasize
;
5193 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5194 downloaddiff
= fulldatasize
- prevdownsize
;
5195 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5196 prevdownsize
= fulldatasize
;
5199 fdata
= f
.getvalue();
5202 os
.remove(tmpfilename
);
5203 exec_time_end
= time
.time();
5204 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5205 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5208 if(not haveparamiko
):
5209 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5213 def upload_file_to_sftp_file(sftpfile
, url
):
5214 urlparts
= urlparse
.urlparse(url
);
5215 file_name
= os
.path
.basename(urlparts
.path
);
5216 file_dir
= os
.path
.dirname(urlparts
.path
);
5217 sftp_port
= urlparts
.port
;
5218 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5220 if(urlparts
.port
is None):
5223 sftp_port
= urlparts
.port
;
5224 if(urlparts
.username
is not None):
5225 sftp_username
= urlparts
.username
;
5227 sftp_username
= "anonymous";
5228 if(urlparts
.password
is not None):
5229 sftp_password
= urlparts
.password
;
5230 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5231 sftp_password
= "anonymous";
5234 if(urlparts
.scheme
!="sftp"):
5236 ssh
= paramiko
.SSHClient();
5237 ssh
.load_system_host_keys();
5238 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
5240 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5241 except paramiko
.ssh_exception
.SSHException
:
5243 except socket
.gaierror
:
5244 log
.info("Error With URL "+httpurl
);
5246 except socket
.timeout
:
5247 log
.info("Error With URL "+httpurl
);
5249 sftp
= ssh
.open_sftp();
5250 sftp
.putfo(sftpfile
, urlparts
.path
);
5253 sftpfile
.seek(0, 0);
5256 def upload_file_to_sftp_file(sftpfile
, url
):
5260 def upload_file_to_sftp_string(sftpstring
, url
):
5261 sftpfileo
= BytesIO(sftpstring
);
5262 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
5266 def upload_file_to_sftp_string(url
):
5271 def download_file_from_pysftp_file(url
):
5272 urlparts
= urlparse
.urlparse(url
);
5273 file_name
= os
.path
.basename(urlparts
.path
);
5274 file_dir
= os
.path
.dirname(urlparts
.path
);
5275 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5277 sftp_port
= urlparts
.port
;
5278 if(urlparts
.port
is None):
5281 sftp_port
= urlparts
.port
;
5282 if(urlparts
.username
is not None):
5283 sftp_username
= urlparts
.username
;
5285 sftp_username
= "anonymous";
5286 if(urlparts
.password
is not None):
5287 sftp_password
= urlparts
.password
;
5288 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5289 sftp_password
= "anonymous";
5292 if(urlparts
.scheme
!="sftp"):
5295 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5296 except paramiko
.ssh_exception
.SSHException
:
5298 except socket
.gaierror
:
5299 log
.info("Error With URL "+httpurl
);
5301 except socket
.timeout
:
5302 log
.info("Error With URL "+httpurl
);
5304 sftp
= ssh
.open_sftp();
5305 sftpfile
= BytesIO();
5306 sftp
.getfo(urlparts
.path
, sftpfile
);
5309 sftpfile
.seek(0, 0);
5312 def download_file_from_pysftp_file(url
):
5316 def download_file_from_pysftp_string(url
):
5317 sftpfile
= download_file_from_pysftp_file(url
);
5318 return sftpfile
.read();
5320 def download_file_from_ftp_string(url
):
5324 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5325 global geturls_download_sleep
, havezstd
, havebrotli
;
5327 sleep
= geturls_download_sleep
;
5330 urlparts
= urlparse
.urlparse(httpurl
);
5331 if(isinstance(httpheaders
, list)):
5332 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
5333 httpheaders
= fix_header_names(httpheaders
);
5334 if(isinstance(httpheaders
, dict)):
5335 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
5337 geturls_text
= download_file_from_pysftp_file(httpurl
);
5338 if(not geturls_text
):
5340 downloadsize
= None;
5341 if(downloadsize
is not None):
5342 downloadsize
= int(downloadsize
);
5343 if downloadsize
is None: downloadsize
= 0;
5346 log
.info("Downloading URL "+httpurl
);
5347 with
BytesIO() as strbuf
:
5349 databytes
= geturls_text
.read(buffersize
);
5350 if not databytes
: break;
5351 datasize
= len(databytes
);
5352 fulldatasize
= datasize
+ fulldatasize
;
5355 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5356 downloaddiff
= fulldatasize
- prevdownsize
;
5357 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5358 prevdownsize
= fulldatasize
;
5359 strbuf
.write(databytes
);
5361 returnval_content
= strbuf
.read();
5362 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5363 geturls_text
.close();
5367 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5371 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5372 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
5373 exec_time_start
= time
.time();
5374 myhash
= hashlib
.new("sha1");
5375 if(sys
.version
[0]=="2"):
5376 myhash
.update(httpurl
);
5377 myhash
.update(str(buffersize
));
5378 myhash
.update(str(exec_time_start
));
5379 if(sys
.version
[0]>="3"):
5380 myhash
.update(httpurl
.encode('utf-8'));
5381 myhash
.update(str(buffersize
).encode('utf-8'));
5382 myhash
.update(str(exec_time_start
).encode('utf-8'));
5383 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
5385 sleep
= geturls_download_sleep
;
5388 pretmpfilename
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
5389 if(not pretmpfilename
):
5391 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
5392 tmpfilename
= f
.name
;
5394 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
5395 except AttributeError:
5397 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
5402 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5403 f
.write(pretmpfilename
.get('Content'));
5405 exec_time_end
= time
.time();
5406 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
5407 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
5411 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5415 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5416 global geturls_download_sleep
, havezstd
, havebrotli
;
5418 sleep
= geturls_download_sleep
;
5421 if(not outfile
=="-"):
5422 outpath
= outpath
.rstrip(os
.path
.sep
);
5423 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
5424 if(not os
.path
.exists(outpath
)):
5425 os
.makedirs(outpath
);
5426 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
5428 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
5430 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5431 if(not pretmpfilename
):
5433 tmpfilename
= pretmpfilename
.get('Filename');
5434 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5436 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
5437 exec_time_start
= time
.time();
5438 shutil
.move(tmpfilename
, filepath
);
5439 exec_time_end
= time
.time();
5440 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
5441 if(os
.path
.exists(tmpfilename
)):
5442 os
.remove(tmpfilename
);
5443 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5445 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5446 tmpfilename
= pretmpfilename
.get('Filename');
5447 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5450 exec_time_start
= time
.time();
5451 with
open(tmpfilename
, 'rb') as ft
:
5454 databytes
= ft
.read(buffersize
[1]);
5455 if not databytes
: break;
5456 datasize
= len(databytes
);
5457 fulldatasize
= datasize
+ fulldatasize
;
5460 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5461 downloaddiff
= fulldatasize
- prevdownsize
;
5462 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5463 prevdownsize
= fulldatasize
;
5466 fdata
= f
.getvalue();
5469 os
.remove(tmpfilename
);
5470 exec_time_end
= time
.time();
5471 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5472 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5476 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5480 def upload_file_to_pysftp_file(sftpfile
, url
):
5481 urlparts
= urlparse
.urlparse(url
);
5482 file_name
= os
.path
.basename(urlparts
.path
);
5483 file_dir
= os
.path
.dirname(urlparts
.path
);
5484 sftp_port
= urlparts
.port
;
5485 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5487 if(urlparts
.port
is None):
5490 sftp_port
= urlparts
.port
;
5491 if(urlparts
.username
is not None):
5492 sftp_username
= urlparts
.username
;
5494 sftp_username
= "anonymous";
5495 if(urlparts
.password
is not None):
5496 sftp_password
= urlparts
.password
;
5497 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5498 sftp_password
= "anonymous";
5501 if(urlparts
.scheme
!="sftp"):
5504 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5505 except paramiko
.ssh_exception
.SSHException
:
5507 except socket
.gaierror
:
5508 log
.info("Error With URL "+httpurl
);
5510 except socket
.timeout
:
5511 log
.info("Error With URL "+httpurl
);
5513 sftp
= ssh
.open_sftp();
5514 sftp
.putfo(sftpfile
, urlparts
.path
);
5517 sftpfile
.seek(0, 0);
5520 def upload_file_to_pysftp_file(sftpfile
, url
):
5524 def upload_file_to_pysftp_string(sftpstring
, url
):
5525 sftpfileo
= BytesIO(sftpstring
);
5526 sftpfile
= upload_file_to_pysftp_files(ftpfileo
, url
);
5530 def upload_file_to_pysftp_string(url
):