4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwgetold.py - Last Update: 10/5/2023 Ver. 2.0.2 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, zlib
, time
, argparse
, subprocess
, socket
, email
.utils
, datetime
, time
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
24 from cgi
import parse_qsl
;
26 from urlparse
import parse_qsl
;
27 except (DeprecationWarning, TypeError):
28 from urlparse
import parse_qsl
;
35 havemechanize
= False;
40 havemechanize
= False;
68 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
108 if(sys
.version
[0]=="2"):
110 from io
import StringIO
, BytesIO
;
113 from cStringIO
import StringIO
;
114 from cStringIO
import StringIO
as BytesIO
;
116 from StringIO
import StringIO
;
117 from StringIO
import StringIO
as BytesIO
;
118 # From http://python-future.org/compatible_idioms.html
119 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
120 from urllib
import urlencode
;
121 from urllib
import urlopen
as urlopenalt
;
122 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
, URLError
, build_opener
, HTTPCookieProcessor
;
123 import urlparse
, cookielib
;
124 from httplib
import HTTPConnection
, HTTPSConnection
;
125 if(sys
.version
[0]>="3"):
126 from io
import StringIO
, BytesIO
;
127 # From http://python-future.org/compatible_idioms.html
128 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
129 from urllib
.request
import urlopen
, Request
, install_opener
, build_opener
, HTTPCookieProcessor
;
130 from urllib
.error
import HTTPError
, URLError
;
131 import urllib
.parse
as urlparse
;
132 import http
.cookiejar
as cookielib
;
133 from http
.client
import HTTPConnection
, HTTPSConnection
;
135 __program_name__
= "PyWWW-Get";
136 __program_alt_name__
= "PyWWWGet";
137 __program_small_name__
= "wwwget";
138 __project__
= __program_name__
;
139 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
140 __version_info__
= (2, 0, 2, "RC 1", 1);
141 __version_date_info__
= (2023, 10, 5, "RC 1", 1);
142 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
143 __revision__
= __version_info__
[3];
144 __revision_id__
= "$Id$";
145 if(__version_info__
[4] is not None):
146 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
147 if(__version_info__
[4] is None):
148 __version_date_plusrc__
= __version_date__
;
149 if(__version_info__
[3] is not None):
150 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
151 if(__version_info__
[3] is None):
152 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
154 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
156 pytempdir
= tempfile
.gettempdir();
158 PyBitness
= platform
.architecture();
159 if(PyBitness
=="32bit" or PyBitness
=="32"):
161 elif(PyBitness
=="64bit" or PyBitness
=="64"):
166 compression_supported_list
= ['identity', 'gzip', 'deflate', 'bzip2'];
168 compression_supported_list
.append('br');
170 compression_supported_list
.append('zstd');
172 compression_supported_list
.append('lzma');
173 compression_supported_list
.append('xz');
174 compression_supported
= ', '.join(compression_supported_list
);
176 geturls_cj
= cookielib
.CookieJar();
177 windowsNT4_ua_string
= "Windows NT 4.0";
178 windowsNT4_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
179 windows2k_ua_string
= "Windows NT 5.0";
180 windows2k_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
181 windowsXP_ua_string
= "Windows NT 5.1";
182 windowsXP_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
183 windowsXP64_ua_string
= "Windows NT 5.2; Win64; x64";
184 windowsXP64_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
185 windows7_ua_string
= "Windows NT 6.1; Win64; x64";
186 windows7_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
187 windows8_ua_string
= "Windows NT 6.2; Win64; x64";
188 windows8_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
189 windows81_ua_string
= "Windows NT 6.3; Win64; x64";
190 windows81_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
191 windows10_ua_string
= "Windows NT 10.0; Win64; x64";
192 windows10_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
193 windows11_ua_string
= "Windows NT 11.0; Win64; x64";
194 windows11_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
195 geturls_ua_firefox_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:109.0) Gecko/20100101 Firefox/117.0";
196 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
197 geturls_ua_chrome_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
198 geturls_ua_chromium_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
199 geturls_ua_palemoon_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
200 geturls_ua_opera_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
201 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
202 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; Trident/7.0; rv:11.0) like Gecko";
203 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
204 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
205 if(platform
.python_implementation()!=""):
206 py_implementation
= platform
.python_implementation();
207 if(platform
.python_implementation()==""):
208 py_implementation
= "Python";
209 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=py_implementation
, pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
210 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
211 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
212 geturls_ua
= geturls_ua_firefox_windows7
;
213 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
214 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
215 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
216 geturls_headers_chrome_windows7
.update(windows7_ua_addon
);
217 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
218 geturls_headers_chromium_windows7
.update(windows7_ua_addon
);
219 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
220 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
221 geturls_headers_opera_windows7
.update(windows7_ua_addon
);
222 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
223 geturls_headers_vivaldi_windows7
.update(windows7_ua_addon
);
224 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
225 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
226 geturls_headers_microsoft_edge_windows7
.update(windows7_ua_addon
);
227 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
228 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
229 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
230 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
231 geturls_headers
= geturls_headers_firefox_windows7
;
232 geturls_download_sleep
= 0;
234 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
235 if(outtype
=="print" and dbgenable
):
238 elif(outtype
=="log" and dbgenable
):
239 logging
.info(dbgtxt
);
241 elif(outtype
=="warning" and dbgenable
):
242 logging
.warning(dbgtxt
);
244 elif(outtype
=="error" and dbgenable
):
245 logging
.error(dbgtxt
);
247 elif(outtype
=="critical" and dbgenable
):
248 logging
.critical(dbgtxt
);
250 elif(outtype
=="exception" and dbgenable
):
251 logging
.exception(dbgtxt
);
253 elif(outtype
=="logalt" and dbgenable
):
254 logging
.log(dgblevel
, dbgtxt
);
256 elif(outtype
=="debug" and dbgenable
):
257 logging
.debug(dbgtxt
);
265 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
266 dbgout
= verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
271 def add_url_param(url
, **params
):
273 parts
= list(urlparse
.urlsplit(url
));
274 d
= dict(parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
276 parts
[n
]=urlencode(d
);
277 return urlparse
.urlunsplit(parts
);
279 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
280 def which_exec(execfile):
281 for path
in os
.environ
["PATH"].split(":"):
282 if os
.path
.exists(path
+ "/" + execfile):
283 return path
+ "/" + execfile;
285 def listize(varlist
):
293 newlistreg
.update({ilx
: varlist
[il
]});
294 newlistrev
.update({varlist
[il
]: ilx
});
297 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
300 def twolistize(varlist
):
310 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
311 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
312 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
313 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
316 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
317 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
318 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
321 def arglistize(proexec
, *varlist
):
325 newarglist
= [proexec
];
327 if varlist
[il
][0] is not None:
328 newarglist
.append(varlist
[il
][0]);
329 if varlist
[il
][1] is not None:
330 newarglist
.append(varlist
[il
][1]);
334 def fix_header_names(header_dict
):
335 if(sys
.version
[0]=="2"):
336 header_dict
= {k
.title(): v
for k
, v
in header_dict
.iteritems()};
337 if(sys
.version
[0]>="3"):
338 header_dict
= {k
.title(): v
for k
, v
in header_dict
.items()};
341 # hms_string by ArcGIS Python Recipes
342 # https://arcpy.wordpress.com/2012/04/20/146/
343 def hms_string(sec_elapsed
):
344 h
= int(sec_elapsed
/ (60 * 60));
345 m
= int((sec_elapsed
% (60 * 60)) / 60);
346 s
= sec_elapsed
% 60.0;
347 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
349 # get_readable_size by Lipis
350 # http://stackoverflow.com/posts/14998888/revisions
351 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
353 if(unit
!="IEC" and unit
!="SI"):
356 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
357 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
360 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
361 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
366 if abs(bytes
) < unitsize
:
367 strformat
= "%3."+str(precision
)+"f%s";
368 pre_return_val
= (strformat
% (bytes
, unit
));
369 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
370 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
371 alt_return_val
= pre_return_val
.split();
372 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
375 strformat
= "%."+str(precision
)+"f%s";
376 pre_return_val
= (strformat
% (bytes
, "YiB"));
377 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
378 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
379 alt_return_val
= pre_return_val
.split();
380 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
383 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
385 usehashtypes
= usehashtypes
.lower();
386 getfilesize
= os
.path
.getsize(infile
);
387 return_val
= get_readable_size(getfilesize
, precision
, unit
);
389 hashtypelist
= usehashtypes
.split(",");
390 openfile
= open(infile
, "rb");
391 filecontents
= openfile
.read();
394 listnumend
= len(hashtypelist
);
395 while(listnumcount
< listnumend
):
396 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
397 hashtypelistup
= hashtypelistlow
.upper();
398 filehash
= hashlib
.new(hashtypelistup
);
399 filehash
.update(filecontents
);
400 filegethash
= filehash
.hexdigest();
401 return_val
.update({hashtypelistup
: filegethash
});
405 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
407 usehashtypes
= usehashtypes
.lower();
408 getfilesize
= len(instring
);
409 return_val
= get_readable_size(getfilesize
, precision
, unit
);
411 hashtypelist
= usehashtypes
.split(",");
413 listnumend
= len(hashtypelist
);
414 while(listnumcount
< listnumend
):
415 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
416 hashtypelistup
= hashtypelistlow
.upper();
417 filehash
= hashlib
.new(hashtypelistup
);
418 if(sys
.version
[0]=="2"):
419 filehash
.update(instring
);
420 if(sys
.version
[0]>="3"):
421 filehash
.update(instring
.encode('utf-8'));
422 filegethash
= filehash
.hexdigest();
423 return_val
.update({hashtypelistup
: filegethash
});
427 def http_status_to_reason(code
):
430 101: 'Switching Protocols',
435 203: 'Non-Authoritative Information',
437 205: 'Reset Content',
438 206: 'Partial Content',
440 208: 'Already Reported',
442 300: 'Multiple Choices',
443 301: 'Moved Permanently',
448 307: 'Temporary Redirect',
449 308: 'Permanent Redirect',
452 402: 'Payment Required',
455 405: 'Method Not Allowed',
456 406: 'Not Acceptable',
457 407: 'Proxy Authentication Required',
458 408: 'Request Timeout',
461 411: 'Length Required',
462 412: 'Precondition Failed',
463 413: 'Payload Too Large',
465 415: 'Unsupported Media Type',
466 416: 'Range Not Satisfiable',
467 417: 'Expectation Failed',
468 421: 'Misdirected Request',
469 422: 'Unprocessable Entity',
471 424: 'Failed Dependency',
472 426: 'Upgrade Required',
473 428: 'Precondition Required',
474 429: 'Too Many Requests',
475 431: 'Request Header Fields Too Large',
476 451: 'Unavailable For Legal Reasons',
477 500: 'Internal Server Error',
478 501: 'Not Implemented',
480 503: 'Service Unavailable',
481 504: 'Gateway Timeout',
482 505: 'HTTP Version Not Supported',
483 506: 'Variant Also Negotiates',
484 507: 'Insufficient Storage',
485 508: 'Loop Detected',
487 511: 'Network Authentication Required'
489 return reasons
.get(code
, 'Unknown Status Code');
491 def ftp_status_to_reason(code
):
493 110: 'Restart marker reply',
494 120: 'Service ready in nnn minutes',
495 125: 'Data connection already open; transfer starting',
496 150: 'File status okay; about to open data connection',
498 202: 'Command not implemented, superfluous at this site',
499 211: 'System status, or system help reply',
500 212: 'Directory status',
503 215: 'NAME system type',
504 220: 'Service ready for new user',
505 221: 'Service closing control connection',
506 225: 'Data connection open; no transfer in progress',
507 226: 'Closing data connection',
508 227: 'Entering Passive Mode',
509 230: 'User logged in, proceed',
510 250: 'Requested file action okay, completed',
511 257: '"PATHNAME" created',
512 331: 'User name okay, need password',
513 332: 'Need account for login',
514 350: 'Requested file action pending further information',
515 421: 'Service not available, closing control connection',
516 425: 'Can\'t open data connection',
517 426: 'Connection closed; transfer aborted',
518 450: 'Requested file action not taken',
519 451: 'Requested action aborted. Local error in processing',
520 452: 'Requested action not taken. Insufficient storage space in system',
521 500: 'Syntax error, command unrecognized',
522 501: 'Syntax error in parameters or arguments',
523 502: 'Command not implemented',
524 503: 'Bad sequence of commands',
525 504: 'Command not implemented for that parameter',
526 530: 'Not logged in',
527 532: 'Need account for storing files',
528 550: 'Requested action not taken. File unavailable',
529 551: 'Requested action aborted. Page type unknown',
530 552: 'Requested file action aborted. Exceeded storage allocation',
531 553: 'Requested action not taken. File name not allowed'
533 return reasons
.get(code
, 'Unknown Status Code');
535 def sftp_status_to_reason(code
):
539 2: 'SSH_FX_NO_SUCH_FILE',
540 3: 'SSH_FX_PERMISSION_DENIED',
542 5: 'SSH_FX_BAD_MESSAGE',
543 6: 'SSH_FX_NO_CONNECTION',
544 7: 'SSH_FX_CONNECTION_LOST',
545 8: 'SSH_FX_OP_UNSUPPORTED'
547 return reasons
.get(code
, 'Unknown Status Code');
549 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
550 if isinstance(headers
, dict):
552 if(sys
.version
[0]=="2"):
553 for headkey
, headvalue
in headers
.iteritems():
554 returnval
.append((headkey
, headvalue
));
555 if(sys
.version
[0]>="3"):
556 for headkey
, headvalue
in headers
.items():
557 returnval
.append((headkey
, headvalue
));
558 elif isinstance(headers
, list):
564 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
565 if isinstance(headers
, dict):
567 if(sys
.version
[0]=="2"):
568 for headkey
, headvalue
in headers
.iteritems():
569 returnval
.append(headkey
+": "+headvalue
);
570 if(sys
.version
[0]>="3"):
571 for headkey
, headvalue
in headers
.items():
572 returnval
.append(headkey
+": "+headvalue
);
573 elif isinstance(headers
, list):
579 def make_http_headers_from_pycurl_to_dict(headers
):
581 headers
= headers
.strip().split('\r\n');
582 for header
in headers
:
583 parts
= header
.split(': ', 1)
586 header_dict
[key
.title()] = value
;
589 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", compression_supported
), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
590 if isinstance(headers
, list):
595 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
597 elif isinstance(headers
, dict):
603 def get_httplib_support(checkvalue
=None):
604 global haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
606 returnval
.append("ftp");
607 returnval
.append("httplib");
609 returnval
.append("httplib2");
610 returnval
.append("urllib");
612 returnval
.append("urllib3");
613 returnval
.append("request3");
614 returnval
.append("request");
616 returnval
.append("requests");
618 returnval
.append("aiohttp");
620 returnval
.append("httpx");
621 returnval
.append("httpx2");
623 returnval
.append("mechanize");
625 returnval
.append("pycurl");
626 if(hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
627 returnval
.append("pycurl2");
628 if(hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
629 returnval
.append("pycurl3");
631 returnval
.append("sftp");
633 returnval
.append("pysftp");
634 if(not checkvalue
is None):
635 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
636 checkvalue
= "urllib";
637 if(checkvalue
=="httplib1"):
638 checkvalue
= "httplib";
639 if(checkvalue
in returnval
):
645 def check_httplib_support(checkvalue
="urllib"):
646 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
647 checkvalue
= "urllib";
648 if(checkvalue
=="httplib1"):
649 checkvalue
= "httplib";
650 returnval
= get_httplib_support(checkvalue
);
653 def get_httplib_support_list():
654 returnval
= get_httplib_support(None);
657 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1, timeout
=10):
658 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
660 sleep
= geturls_download_sleep
;
663 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
664 httplibuse
= "urllib";
665 if(httplibuse
=="httplib1"):
666 httplibuse
= "httplib";
667 if(not haverequests
and httplibuse
=="requests"):
668 httplibuse
= "urllib";
669 if(not haveaiohttp
and httplibuse
=="aiohttp"):
670 httplibuse
= "urllib";
671 if(not havehttpx
and httplibuse
=="httpx"):
672 httplibuse
= "urllib";
673 if(not havehttpx
and httplibuse
=="httpx2"):
674 httplibuse
= "urllib";
675 if(not havehttpcore
and httplibuse
=="httpcore"):
676 httplibuse
= "urllib";
677 if(not havehttpcore
and httplibuse
=="httpcore2"):
678 httplibuse
= "urllib";
679 if(not havemechanize
and httplibuse
=="mechanize"):
680 httplibuse
= "urllib";
681 if(not havepycurl
and httplibuse
=="pycurl"):
682 httplibuse
= "urllib";
683 if(not havepycurl
and httplibuse
=="pycurl2"):
684 httplibuse
= "urllib";
685 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
686 httplibuse
= "pycurl";
687 if(not havepycurl
and httplibuse
=="pycurl3"):
688 httplibuse
= "urllib";
689 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
690 httplibuse
= "pycurl2";
691 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
692 httplibuse
= "pycurl";
693 if(not havehttplib2
and httplibuse
=="httplib2"):
694 httplibuse
= "httplib";
695 if(not haveparamiko
and httplibuse
=="sftp"):
697 if(not havepysftp
and httplibuse
=="pysftp"):
699 if(httplibuse
=="urllib" or httplibuse
=="request"):
700 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
701 elif(httplibuse
=="request"):
702 returnval
= download_from_url_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
703 elif(httplibuse
=="request3"):
704 returnval
= download_from_url_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
705 elif(httplibuse
=="httplib"):
706 returnval
= download_from_url_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
707 elif(httplibuse
=="httplib2"):
708 returnval
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
709 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
710 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
711 elif(httplibuse
=="requests"):
712 returnval
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
713 elif(httplibuse
=="aiohttp"):
714 returnval
= download_from_url_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
715 elif(httplibuse
=="httpx"):
716 returnval
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
717 elif(httplibuse
=="httpx2"):
718 returnval
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
719 elif(httplibuse
=="httpcore"):
720 returnval
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
721 elif(httplibuse
=="httpcore2"):
722 returnval
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
723 elif(httplibuse
=="mechanize"):
724 returnval
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
725 elif(httplibuse
=="pycurl"):
726 returnval
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
727 elif(httplibuse
=="pycurl2"):
728 returnval
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
729 elif(httplibuse
=="pycurl3"):
730 returnval
= download_from_url_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
731 elif(httplibuse
=="ftp"):
732 returnval
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
733 elif(httplibuse
=="sftp"):
734 returnval
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
735 elif(httplibuse
=="pysftp"):
736 returnval
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
741 def download_from_url_from_list(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1, timeout
=10):
742 if(isinstance(httpurl
, list)):
744 elif(isinstance(httpurl
, tuple)):
746 elif(isinstance(httpurl
, dict)):
747 httpurl
= httpurl
.values();
750 listsize
= len(httpurl
);
753 while(listcount
<listsize
):
754 ouputval
= download_from_url(httpurl
[listcount
], httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, buffersize
, sleep
, timeout
);
755 returnval
.append(ouputval
);
759 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
760 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
762 sleep
= geturls_download_sleep
;
765 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
766 httplibuse
= "urllib";
767 if(httplibuse
=="httplib1"):
768 httplibuse
= "httplib";
769 if(not haverequests
and httplibuse
=="requests"):
770 httplibuse
= "urllib";
771 if(not haveaiohttp
and httplibuse
=="aiohttp"):
772 httplibuse
= "urllib";
773 if(not havehttpx
and httplibuse
=="httpx"):
774 httplibuse
= "urllib";
775 if(not havehttpx
and httplibuse
=="httpx2"):
776 httplibuse
= "urllib";
777 if(not havehttpcore
and httplibuse
=="httpcore"):
778 httplibuse
= "urllib";
779 if(not havehttpcore
and httplibuse
=="httpcore2"):
780 httplibuse
= "urllib";
781 if(not havemechanize
and httplibuse
=="mechanize"):
782 httplibuse
= "urllib";
783 if(not havepycurl
and httplibuse
=="pycurl"):
784 httplibuse
= "urllib";
785 if(not havepycurl
and httplibuse
=="pycurl2"):
786 httplibuse
= "urllib";
787 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
788 httplibuse
= "pycurl";
789 if(not havepycurl
and httplibuse
=="pycurl3"):
790 httplibuse
= "urllib";
791 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
792 httplibuse
= "pycurl2";
793 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
794 httplibuse
= "pycurl";
795 if(not havehttplib2
and httplibuse
=="httplib2"):
796 httplibuse
= "httplib";
797 if(not haveparamiko
and httplibuse
=="sftp"):
799 if(not haveparamiko
and httplibuse
=="pysftp"):
801 if(httplibuse
=="urllib" or httplibuse
=="request"):
802 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
803 elif(httplibuse
=="request"):
804 returnval
= download_from_url_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
805 elif(httplibuse
=="request3"):
806 returnval
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
807 elif(httplibuse
=="httplib"):
808 returnval
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
809 elif(httplibuse
=="httplib2"):
810 returnval
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
811 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
812 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
813 elif(httplibuse
=="requests"):
814 returnval
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
815 elif(httplibuse
=="aiohttp"):
816 returnval
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
817 elif(httplibuse
=="httpx"):
818 returnval
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
819 elif(httplibuse
=="httpx2"):
820 returnval
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
821 elif(httplibuse
=="httpcore"):
822 returnval
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
823 elif(httplibuse
=="httpcore2"):
824 returnval
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
825 elif(httplibuse
=="mechanize"):
826 returnval
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
827 elif(httplibuse
=="pycurl"):
828 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
829 elif(httplibuse
=="pycurl2"):
830 returnval
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
831 elif(httplibuse
=="pycurl3"):
832 returnval
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
833 elif(httplibuse
=="ftp"):
834 returnval
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
835 elif(httplibuse
=="sftp"):
836 returnval
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
837 elif(httplibuse
=="pysftp"):
838 returnval
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
843 def download_from_url_file_with_list(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
844 if(isinstance(httpurl
, list)):
846 elif(isinstance(httpurl
, tuple)):
848 elif(isinstance(httpurl
, dict)):
849 httpurl
= httpurl
.values();
852 listsize
= len(httpurl
);
855 while(listcount
<listsize
):
856 ouputval
= download_from_url_file(httpurl
[listcount
], httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, ranges
, buffersize
, sleep
, timeout
);
857 returnval
.append(ouputval
);
861 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
862 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
864 sleep
= geturls_download_sleep
;
867 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
868 httplibuse
= "urllib";
869 if(httplibuse
=="httplib1"):
870 httplibuse
= "httplib";
871 if(not haverequests
and httplibuse
=="requests"):
872 httplibuse
= "urllib";
873 if(not haveaiohttp
and httplibuse
=="aiohttp"):
874 httplibuse
= "urllib";
875 if(not havehttpx
and httplibuse
=="httpx"):
876 httplibuse
= "urllib";
877 if(not havehttpx
and httplibuse
=="httpx2"):
878 httplibuse
= "urllib";
879 if(not havehttpcore
and httplibuse
=="httpcore"):
880 httplibuse
= "urllib";
881 if(not havehttpcore
and httplibuse
=="httpcore2"):
882 httplibuse
= "urllib";
883 if(not havemechanize
and httplibuse
=="mechanize"):
884 httplibuse
= "urllib";
885 if(not havepycurl
and httplibuse
=="pycurl"):
886 httplibuse
= "urllib";
887 if(not havepycurl
and httplibuse
=="pycurl2"):
888 httplibuse
= "urllib";
889 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
890 httplibuse
= "pycurl";
891 if(not havepycurl
and httplibuse
=="pycurl3"):
892 httplibuse
= "urllib";
893 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
894 httplibuse
= "pycurl2";
895 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
896 httplibuse
= "pycurl";
897 if(not havehttplib2
and httplibuse
=="httplib2"):
898 httplibuse
= "httplib";
899 if(not haveparamiko
and httplibuse
=="sftp"):
901 if(not havepysftp
and httplibuse
=="pysftp"):
903 if(httplibuse
=="urllib" or httplibuse
=="request"):
904 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
905 elif(httplibuse
=="request"):
906 returnval
= download_from_url_to_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
907 elif(httplibuse
=="request3"):
908 returnval
= download_from_url_to_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
909 elif(httplibuse
=="httplib"):
910 returnval
= download_from_url_to_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
911 elif(httplibuse
=="httplib2"):
912 returnval
= download_from_url_to_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
913 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
914 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
915 elif(httplibuse
=="requests"):
916 returnval
= download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
917 elif(httplibuse
=="aiohttp"):
918 returnval
= download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
919 elif(httplibuse
=="httpx"):
920 returnval
= download_from_url_to_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
921 elif(httplibuse
=="httpx2"):
922 returnval
= download_from_url_to_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
923 elif(httplibuse
=="httpcore"):
924 returnval
= download_from_url_to_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
925 elif(httplibuse
=="httpcore2"):
926 returnval
= download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
927 elif(httplibuse
=="mechanize"):
928 returnval
= download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
929 elif(httplibuse
=="pycurl"):
930 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
931 elif(httplibuse
=="pycurl2"):
932 returnval
= download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
933 elif(httplibuse
=="pycurl3"):
934 returnval
= download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
935 elif(httplibuse
=="ftp"):
936 returnval
= download_from_url_to_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
937 elif(httplibuse
=="sftp"):
938 returnval
= download_from_url_to_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
939 elif(httplibuse
=="pysftp"):
940 returnval
= download_from_url_to_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
945 def download_from_url_to_file_with_list(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
946 if(isinstance(httpurl
, list)):
948 elif(isinstance(httpurl
, tuple)):
950 elif(isinstance(httpurl
, dict)):
951 httpurl
= httpurl
.values();
954 listsize
= len(httpurl
);
957 while(listcount
<listsize
):
958 ouputval
= download_from_url_to_file(httpurl
[listcount
], httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
959 returnval
.append(ouputval
);
963 def download_from_url_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
964 global geturls_download_sleep
, havezstd
, havebrotli
;
966 sleep
= geturls_download_sleep
;
969 urlparts
= urlparse
.urlparse(httpurl
);
970 if(isinstance(httpheaders
, list)):
971 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
972 httpheaders
= fix_header_names(httpheaders
);
973 if(httpuseragent
is not None):
974 if('User-Agent' in httpheaders
):
975 httpheaders
['User-Agent'] = httpuseragent
;
977 httpuseragent
.update({'User-Agent': httpuseragent
});
978 if(httpreferer
is not None):
979 if('Referer' in httpheaders
):
980 httpheaders
['Referer'] = httpreferer
;
982 httpuseragent
.update({'Referer': httpreferer
});
983 if(urlparts
.username
is not None or urlparts
.password
is not None):
984 if(sys
.version
[0]=="2"):
985 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
986 if(sys
.version
[0]>="3"):
987 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
988 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
989 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
990 if(isinstance(httpheaders
, dict)):
991 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
992 geturls_opener
.addheaders
= httpheaders
;
994 if(postdata
is not None and not isinstance(postdata
, dict)):
995 postdata
= urlencode(postdata
);
997 geturls_request
= Request(httpurl
);
998 if(httpmethod
=="GET"):
999 geturls_text
= geturls_opener
.open(geturls_request
);
1000 elif(httpmethod
=="POST"):
1001 geturls_text
= geturls_opener
.open(geturls_request
, data
=postdata
);
1003 geturls_text
= geturls_opener
.open(geturls_request
);
1004 except HTTPError
as geturls_text_error
:
1005 geturls_text
= geturls_text_error
;
1006 log
.info("Error With URL "+httpurl
);
1008 log
.info("Error With URL "+httpurl
);
1010 except socket
.timeout
:
1011 log
.info("Error With URL "+httpurl
);
1013 httpcodeout
= geturls_text
.getcode();
1015 httpcodereason
= geturls_text
.reason
;
1016 except AttributeError:
1017 httpcodereason
= http_status_to_reason(geturls_text
.getcode());
1019 httpversionout
= geturls_text
.version
;
1020 except AttributeError:
1021 httpversionout
= "1.1";
1022 httpmethodout
= geturls_request
.get_method();
1023 httpurlout
= geturls_text
.geturl();
1024 httpheaderout
= geturls_text
.info();
1025 httpheadersentout
= httpheaders
;
1026 if(isinstance(httpheaderout
, list)):
1027 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1028 httpheaderout
= fix_header_names(httpheaderout
);
1029 if(sys
.version
[0]=="2"):
1031 prehttpheaderout
= httpheaderout
;
1032 httpheaderkeys
= httpheaderout
.keys();
1033 imax
= len(httpheaderkeys
);
1037 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1039 except AttributeError:
1041 if(isinstance(httpheadersentout
, list)):
1042 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1043 httpheadersentout
= fix_header_names(httpheadersentout
);
1044 downloadsize
= httpheaderout
.get('Content-Length');
1045 if(downloadsize
is not None):
1046 downloadsize
= int(downloadsize
);
1047 if downloadsize
is None: downloadsize
= 0;
1050 log
.info("Downloading URL "+httpurl
);
1051 with
BytesIO() as strbuf
:
1053 databytes
= geturls_text
.read(buffersize
);
1054 if not databytes
: break;
1055 datasize
= len(databytes
);
1056 fulldatasize
= datasize
+ fulldatasize
;
1059 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1060 downloaddiff
= fulldatasize
- prevdownsize
;
1061 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1062 prevdownsize
= fulldatasize
;
1063 strbuf
.write(databytes
);
1065 returnval_content
= strbuf
.read();
1066 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1068 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1071 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1073 returnval_content
= zlib
.decompress(returnval_content
);
1076 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1078 returnval_content
= brotli
.decompress(returnval_content
);
1079 except brotli
.error
:
1081 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1083 returnval_content
= zstandard
.decompress(returnval_content
);
1084 except zstandard
.error
:
1086 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1088 returnval_content
= lzma
.decompress(returnval_content
);
1089 except zstandard
.error
:
1091 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1093 returnval_content
= bz2
.decompress(returnval_content
);
1094 except zstandard
.error
:
1096 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "urllib"};
1097 geturls_text
.close();
1100 def download_from_url_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1101 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1102 exec_time_start
= time
.time();
1103 myhash
= hashlib
.new("sha1");
1104 if(sys
.version
[0]=="2"):
1105 myhash
.update(httpurl
);
1106 myhash
.update(str(buffersize
));
1107 myhash
.update(str(exec_time_start
));
1108 if(sys
.version
[0]>="3"):
1109 myhash
.update(httpurl
.encode('utf-8'));
1110 myhash
.update(str(buffersize
).encode('utf-8'));
1111 myhash
.update(str(exec_time_start
).encode('utf-8'));
1112 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1114 sleep
= geturls_download_sleep
;
1117 pretmpfilename
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1118 if(not pretmpfilename
):
1120 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1121 tmpfilename
= f
.name
;
1123 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1124 except AttributeError:
1126 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1131 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1132 f
.write(pretmpfilename
.get('Content'));
1134 exec_time_end
= time
.time();
1135 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1136 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1139 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1140 global geturls_download_sleep
, havezstd
, havebrotli
;
1142 sleep
= geturls_download_sleep
;
1145 if(not outfile
=="-"):
1146 outpath
= outpath
.rstrip(os
.path
.sep
);
1147 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1148 if(not os
.path
.exists(outpath
)):
1149 os
.makedirs(outpath
);
1150 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1152 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1154 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1155 if(not pretmpfilename
):
1157 tmpfilename
= pretmpfilename
.get('Filename');
1158 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1160 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1161 exec_time_start
= time
.time();
1162 shutil
.move(tmpfilename
, filepath
);
1164 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1165 except AttributeError:
1167 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1172 exec_time_end
= time
.time();
1173 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1174 if(os
.path
.exists(tmpfilename
)):
1175 os
.remove(tmpfilename
);
1176 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1178 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1179 tmpfilename
= pretmpfilename
.get('Filename');
1180 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1183 exec_time_start
= time
.time();
1184 with
open(tmpfilename
, 'rb') as ft
:
1187 databytes
= ft
.read(buffersize
[1]);
1188 if not databytes
: break;
1189 datasize
= len(databytes
);
1190 fulldatasize
= datasize
+ fulldatasize
;
1193 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1194 downloaddiff
= fulldatasize
- prevdownsize
;
1195 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1196 prevdownsize
= fulldatasize
;
1199 fdata
= f
.getvalue();
1202 os
.remove(tmpfilename
);
1203 exec_time_end
= time
.time();
1204 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1205 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1208 def download_from_url_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1209 global geturls_download_sleep
, havezstd
, havebrotli
, havezstd
, havebrotli
;
1211 sleep
= geturls_download_sleep
;
1214 urlparts
= urlparse
.urlparse(httpurl
);
1215 if(isinstance(httpheaders
, list)):
1216 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1217 httpheaders
= fix_header_names(httpheaders
);
1218 if(httpuseragent
is not None):
1219 if('User-Agent' in httpheaders
):
1220 httpheaders
['User-Agent'] = httpuseragent
;
1222 httpuseragent
.update({'User-Agent': httpuseragent
});
1223 if(httpreferer
is not None):
1224 if('Referer' in httpheaders
):
1225 httpheaders
['Referer'] = httpreferer
;
1227 httpuseragent
.update({'Referer': httpreferer
});
1228 if(urlparts
.username
is not None or urlparts
.password
is not None):
1229 if(sys
.version
[0]=="2"):
1230 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1231 if(sys
.version
[0]>="3"):
1232 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1233 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1234 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1235 geturls_opener
.addheaders
= httpheaders
;
1237 if(urlparts
[0]=="http"):
1238 httpconn
= HTTPConnection(urlparts
[1], timeout
=timeout
);
1239 elif(urlparts
[0]=="https"):
1240 httpconn
= HTTPSConnection(urlparts
[1], timeout
=timeout
);
1243 if(postdata
is not None and not isinstance(postdata
, dict)):
1244 postdata
= urlencode(postdata
);
1246 if(httpmethod
=="GET"):
1247 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1248 elif(httpmethod
=="POST"):
1249 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1251 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1252 except socket
.timeout
:
1253 log
.info("Error With URL "+httpurl
);
1255 except socket
.gaierror
:
1256 log
.info("Error With URL "+httpurl
);
1258 except BlockingIOError
:
1259 log
.info("Error With URL "+httpurl
);
1261 geturls_text
= httpconn
.getresponse();
1262 httpcodeout
= geturls_text
.status
;
1263 httpcodereason
= geturls_text
.reason
;
1264 if(geturls_text
.version
=="10"):
1265 httpversionout
= "1.0";
1267 httpversionout
= "1.1";
1268 httpmethodout
= geturls_text
._method
;
1269 httpurlout
= httpurl
;
1270 httpheaderout
= geturls_text
.getheaders();
1271 httpheadersentout
= httpheaders
;
1272 if(isinstance(httpheaderout
, list)):
1273 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1274 if(sys
.version
[0]=="2"):
1276 prehttpheaderout
= httpheaderout
;
1277 httpheaderkeys
= httpheaderout
.keys();
1278 imax
= len(httpheaderkeys
);
1282 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1284 except AttributeError:
1286 httpheaderout
= fix_header_names(httpheaderout
);
1287 if(isinstance(httpheadersentout
, list)):
1288 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1289 httpheadersentout
= fix_header_names(httpheadersentout
);
1290 downloadsize
= httpheaderout
.get('Content-Length');
1291 if(downloadsize
is not None):
1292 downloadsize
= int(downloadsize
);
1293 if downloadsize
is None: downloadsize
= 0;
1296 log
.info("Downloading URL "+httpurl
);
1297 with
BytesIO() as strbuf
:
1299 databytes
= geturls_text
.read(buffersize
);
1300 if not databytes
: break;
1301 datasize
= len(databytes
);
1302 fulldatasize
= datasize
+ fulldatasize
;
1305 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1306 downloaddiff
= fulldatasize
- prevdownsize
;
1307 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1308 prevdownsize
= fulldatasize
;
1309 strbuf
.write(databytes
);
1311 returnval_content
= strbuf
.read();
1312 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1314 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1317 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1319 returnval_content
= zlib
.decompress(returnval_content
);
1322 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1324 returnval_content
= brotli
.decompress(returnval_content
);
1325 except brotli
.error
:
1327 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1329 returnval_content
= zstandard
.decompress(returnval_content
);
1330 except zstandard
.error
:
1332 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1334 returnval_content
= lzma
.decompress(returnval_content
);
1335 except zstandard
.error
:
1337 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1339 returnval_content
= bz2
.decompress(returnval_content
);
1340 except zstandard
.error
:
1342 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httplib"};
1343 geturls_text
.close();
1346 def download_from_url_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1347 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1348 exec_time_start
= time
.time();
1349 myhash
= hashlib
.new("sha1");
1350 if(sys
.version
[0]=="2"):
1351 myhash
.update(httpurl
);
1352 myhash
.update(str(buffersize
));
1353 myhash
.update(str(exec_time_start
));
1354 if(sys
.version
[0]>="3"):
1355 myhash
.update(httpurl
.encode('utf-8'));
1356 myhash
.update(str(buffersize
).encode('utf-8'));
1357 myhash
.update(str(exec_time_start
).encode('utf-8'));
1358 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1360 sleep
= geturls_download_sleep
;
1363 pretmpfilename
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1364 if(not pretmpfilename
):
1366 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1367 tmpfilename
= f
.name
;
1369 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1370 except AttributeError:
1372 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1377 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1378 f
.write(pretmpfilename
.get('Content'));
1380 exec_time_end
= time
.time();
1381 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1382 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1385 def download_from_url_to_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1386 global geturls_download_sleep
, havezstd
, havebrotli
;
1388 sleep
= geturls_download_sleep
;
1391 if(not outfile
=="-"):
1392 outpath
= outpath
.rstrip(os
.path
.sep
);
1393 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1394 if(not os
.path
.exists(outpath
)):
1395 os
.makedirs(outpath
);
1396 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1398 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1400 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1401 if(not pretmpfilename
):
1403 tmpfilename
= pretmpfilename
.get('Filename');
1404 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1406 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1407 exec_time_start
= time
.time();
1408 shutil
.move(tmpfilename
, filepath
);
1410 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1411 except AttributeError:
1413 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1418 exec_time_end
= time
.time();
1419 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1420 if(os
.path
.exists(tmpfilename
)):
1421 os
.remove(tmpfilename
);
1422 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1424 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1425 tmpfilename
= pretmpfilename
.get('Filename');
1426 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1429 exec_time_start
= time
.time();
1430 with
open(tmpfilename
, 'rb') as ft
:
1433 databytes
= ft
.read(buffersize
[1]);
1434 if not databytes
: break;
1435 datasize
= len(databytes
);
1436 fulldatasize
= datasize
+ fulldatasize
;
1439 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1440 downloaddiff
= fulldatasize
- prevdownsize
;
1441 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1442 prevdownsize
= fulldatasize
;
1445 fdata
= f
.getvalue();
1448 os
.remove(tmpfilename
);
1449 exec_time_end
= time
.time();
1450 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1451 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1455 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1456 global geturls_download_sleep
, havezstd
, havebrotli
;
1458 sleep
= geturls_download_sleep
;
1461 urlparts
= urlparse
.urlparse(httpurl
);
1462 if(isinstance(httpheaders
, list)):
1463 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1464 httpheaders
= fix_header_names(httpheaders
);
1465 if(httpuseragent
is not None):
1466 if('User-Agent' in httpheaders
):
1467 httpheaders
['User-Agent'] = httpuseragent
;
1469 httpuseragent
.update({'User-Agent': httpuseragent
});
1470 if(httpreferer
is not None):
1471 if('Referer' in httpheaders
):
1472 httpheaders
['Referer'] = httpreferer
;
1474 httpuseragent
.update({'Referer': httpreferer
});
1475 if(urlparts
.username
is not None or urlparts
.password
is not None):
1476 if(sys
.version
[0]=="2"):
1477 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1478 if(sys
.version
[0]>="3"):
1479 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1480 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1481 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1482 geturls_opener
.addheaders
= httpheaders
;
1484 if(urlparts
[0]=="http"):
1485 httpconn
= HTTPConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
1486 elif(urlparts
[0]=="https"):
1487 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
1490 if(postdata
is not None and not isinstance(postdata
, dict)):
1491 postdata
= urlencode(postdata
);
1493 if(httpmethod
=="GET"):
1494 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1495 elif(httpmethod
=="POST"):
1496 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1498 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1499 except socket
.timeout
:
1500 log
.info("Error With URL "+httpurl
);
1502 except socket
.gaierror
:
1503 log
.info("Error With URL "+httpurl
);
1505 except BlockingIOError
:
1506 log
.info("Error With URL "+httpurl
);
1508 geturls_text
= httpconn
.getresponse();
1509 httpcodeout
= geturls_text
.status
;
1510 httpcodereason
= geturls_text
.reason
;
1511 if(geturls_text
.version
=="10"):
1512 httpversionout
= "1.0";
1514 httpversionout
= "1.1";
1515 httpmethodout
= httpmethod
;
1516 httpurlout
= httpurl
;
1517 httpheaderout
= geturls_text
.getheaders();
1518 httpheadersentout
= httpheaders
;
1519 if(isinstance(httpheaderout
, list)):
1520 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1521 if(sys
.version
[0]=="2"):
1523 prehttpheaderout
= httpheaderout
;
1524 httpheaderkeys
= httpheaderout
.keys();
1525 imax
= len(httpheaderkeys
);
1529 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1531 except AttributeError:
1533 httpheaderout
= fix_header_names(httpheaderout
);
1534 if(isinstance(httpheadersentout
, list)):
1535 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1536 httpheadersentout
= fix_header_names(httpheadersentout
);
1537 downloadsize
= httpheaderout
.get('Content-Length');
1538 if(downloadsize
is not None):
1539 downloadsize
= int(downloadsize
);
1540 if downloadsize
is None: downloadsize
= 0;
1543 log
.info("Downloading URL "+httpurl
);
1544 with
BytesIO() as strbuf
:
1546 databytes
= geturls_text
.read(buffersize
);
1547 if not databytes
: break;
1548 datasize
= len(databytes
);
1549 fulldatasize
= datasize
+ fulldatasize
;
1552 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1553 downloaddiff
= fulldatasize
- prevdownsize
;
1554 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1555 prevdownsize
= fulldatasize
;
1556 strbuf
.write(databytes
);
1558 returnval_content
= strbuf
.read();
1559 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1561 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1564 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1566 returnval_content
= zlib
.decompress(returnval_content
);
1569 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1571 returnval_content
= brotli
.decompress(returnval_content
);
1572 except brotli
.error
:
1574 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1576 returnval_content
= zstandard
.decompress(returnval_content
);
1577 except zstandard
.error
:
1579 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1581 returnval_content
= lzma
.decompress(returnval_content
);
1582 except zstandard
.error
:
1584 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1586 returnval_content
= bz2
.decompress(returnval_content
);
1587 except zstandard
.error
:
1589 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httplib2"};
1590 geturls_text
.close();
1593 if(not havehttplib2
):
1594 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1595 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1599 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1600 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1601 exec_time_start
= time
.time();
1602 myhash
= hashlib
.new("sha1");
1603 if(sys
.version
[0]=="2"):
1604 myhash
.update(httpurl
);
1605 myhash
.update(str(buffersize
));
1606 myhash
.update(str(exec_time_start
));
1607 if(sys
.version
[0]>="3"):
1608 myhash
.update(httpurl
.encode('utf-8'));
1609 myhash
.update(str(buffersize
).encode('utf-8'));
1610 myhash
.update(str(exec_time_start
).encode('utf-8'));
1611 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1613 sleep
= geturls_download_sleep
;
1616 pretmpfilename
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1617 if(not pretmpfilename
):
1619 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1620 tmpfilename
= f
.name
;
1622 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1623 except AttributeError:
1625 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1630 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1631 f
.write(pretmpfilename
.get('Content'));
1633 exec_time_end
= time
.time();
1634 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1635 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1638 if(not havehttplib2
):
1639 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1640 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1644 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1645 global geturls_download_sleep
, havezstd
, havebrotli
;
1647 sleep
= geturls_download_sleep
;
1650 if(not outfile
=="-"):
1651 outpath
= outpath
.rstrip(os
.path
.sep
);
1652 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1653 if(not os
.path
.exists(outpath
)):
1654 os
.makedirs(outpath
);
1655 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1657 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1659 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1660 if(not pretmpfilename
):
1662 tmpfilename
= pretmpfilename
.get('Filename');
1663 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1665 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1666 exec_time_start
= time
.time();
1667 shutil
.move(tmpfilename
, filepath
);
1669 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1670 except AttributeError:
1672 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1677 exec_time_end
= time
.time();
1678 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1679 if(os
.path
.exists(tmpfilename
)):
1680 os
.remove(tmpfilename
);
1681 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1683 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1684 tmpfilename
= pretmpfilename
.get('Filename');
1685 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1688 exec_time_start
= time
.time();
1689 with
open(tmpfilename
, 'rb') as ft
:
1692 databytes
= ft
.read(buffersize
[1]);
1693 if not databytes
: break;
1694 datasize
= len(databytes
);
1695 fulldatasize
= datasize
+ fulldatasize
;
1698 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1699 downloaddiff
= fulldatasize
- prevdownsize
;
1700 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1701 prevdownsize
= fulldatasize
;
1704 fdata
= f
.getvalue();
1707 os
.remove(tmpfilename
);
1708 exec_time_end
= time
.time();
1709 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1710 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1713 if(not havehttplib2
):
1714 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1715 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1718 def download_from_url_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1719 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1722 def download_from_url_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1723 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1726 def download_from_url_to_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1727 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1731 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1732 global geturls_download_sleep
, havezstd
, havebrotli
;
1734 sleep
= geturls_download_sleep
;
1737 urlparts
= urlparse
.urlparse(httpurl
);
1738 if(isinstance(httpheaders
, list)):
1739 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1740 httpheaders
= fix_header_names(httpheaders
);
1741 if(httpuseragent
is not None):
1742 if('User-Agent' in httpheaders
):
1743 httpheaders
['User-Agent'] = httpuseragent
;
1745 httpuseragent
.update({'User-Agent': httpuseragent
});
1746 if(httpreferer
is not None):
1747 if('Referer' in httpheaders
):
1748 httpheaders
['Referer'] = httpreferer
;
1750 httpuseragent
.update({'Referer': httpreferer
});
1751 if(urlparts
.username
is not None or urlparts
.password
is not None):
1752 if(sys
.version
[0]=="2"):
1753 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1754 if(sys
.version
[0]>="3"):
1755 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1756 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1758 if(postdata
is not None and not isinstance(postdata
, dict)):
1759 postdata
= urlencode(postdata
);
1761 reqsession
= requests
.Session();
1762 if(httpmethod
=="GET"):
1763 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1764 elif(httpmethod
=="POST"):
1765 geturls_text
= reqsession
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1767 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1768 except requests
.exceptions
.ConnectTimeout
:
1769 log
.info("Error With URL "+httpurl
);
1771 except requests
.exceptions
.ConnectError
:
1772 log
.info("Error With URL "+httpurl
);
1774 except socket
.timeout
:
1775 log
.info("Error With URL "+httpurl
);
1777 httpcodeout
= geturls_text
.status_code
;
1778 httpcodereason
= geturls_text
.reason
;
1779 if(geturls_text
.raw
.version
=="10"):
1780 httpversionout
= "1.0";
1782 httpversionout
= "1.1";
1783 httpmethodout
= httpmethod
;
1784 httpurlout
= geturls_text
.url
;
1785 httpheaderout
= geturls_text
.headers
;
1786 httpheadersentout
= geturls_text
.request
.headers
;
1787 if(isinstance(httpheaderout
, list)):
1788 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1789 if(sys
.version
[0]=="2"):
1791 prehttpheaderout
= httpheaderout
;
1792 httpheaderkeys
= httpheaderout
.keys();
1793 imax
= len(httpheaderkeys
);
1797 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1799 except AttributeError:
1801 httpheaderout
= fix_header_names(httpheaderout
);
1802 if(isinstance(httpheadersentout
, list)):
1803 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1804 httpheadersentout
= fix_header_names(httpheadersentout
);
1805 downloadsize
= httpheaderout
.get('Content-Length');
1806 if(downloadsize
is not None):
1807 downloadsize
= int(downloadsize
);
1808 if downloadsize
is None: downloadsize
= 0;
1811 log
.info("Downloading URL "+httpurl
);
1812 with
BytesIO() as strbuf
:
1814 databytes
= geturls_text
.raw
.read(buffersize
);
1815 if not databytes
: break;
1816 datasize
= len(databytes
);
1817 fulldatasize
= datasize
+ fulldatasize
;
1820 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1821 downloaddiff
= fulldatasize
- prevdownsize
;
1822 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1823 prevdownsize
= fulldatasize
;
1824 strbuf
.write(databytes
);
1826 returnval_content
= strbuf
.read();
1827 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1829 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1832 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1834 returnval_content
= zlib
.decompress(returnval_content
);
1837 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1839 returnval_content
= brotli
.decompress(returnval_content
);
1840 except brotli
.error
:
1842 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1844 returnval_content
= zstandard
.decompress(returnval_content
);
1845 except zstandard
.error
:
1847 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1849 returnval_content
= lzma
.decompress(returnval_content
);
1850 except zstandard
.error
:
1852 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1854 returnval_content
= bz2
.decompress(returnval_content
);
1855 except zstandard
.error
:
1857 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "requests"};
1858 geturls_text
.close();
1861 if(not haverequests
):
1862 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1863 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1867 def download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1868 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1869 exec_time_start
= time
.time();
1870 myhash
= hashlib
.new("sha1");
1871 if(sys
.version
[0]=="2"):
1872 myhash
.update(httpurl
);
1873 myhash
.update(str(buffersize
));
1874 myhash
.update(str(exec_time_start
));
1875 if(sys
.version
[0]>="3"):
1876 myhash
.update(httpurl
.encode('utf-8'));
1877 myhash
.update(str(buffersize
).encode('utf-8'));
1878 myhash
.update(str(exec_time_start
).encode('utf-8'));
1879 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1881 sleep
= geturls_download_sleep
;
1884 pretmpfilename
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1885 if(not pretmpfilename
):
1887 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1888 tmpfilename
= f
.name
;
1890 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1891 except AttributeError:
1893 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1898 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1899 f
.write(pretmpfilename
.get('Content'));
1901 exec_time_end
= time
.time();
1902 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1903 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1906 if(not haverequests
):
1907 def download_from_url_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1908 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1912 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1913 global geturls_download_sleep
, havezstd
, havebrotli
;
1915 sleep
= geturls_download_sleep
;
1918 if(not outfile
=="-"):
1919 outpath
= outpath
.rstrip(os
.path
.sep
);
1920 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1921 if(not os
.path
.exists(outpath
)):
1922 os
.makedirs(outpath
);
1923 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1925 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1927 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1928 if(not pretmpfilename
):
1930 tmpfilename
= pretmpfilename
.get('Filename');
1931 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1933 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1934 exec_time_start
= time
.time();
1935 shutil
.move(tmpfilename
, filepath
);
1937 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1938 except AttributeError:
1940 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1945 exec_time_end
= time
.time();
1946 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1947 if(os
.path
.exists(tmpfilename
)):
1948 os
.remove(tmpfilename
);
1949 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1951 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1952 tmpfilename
= pretmpfilename
.get('Filename');
1953 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1956 exec_time_start
= time
.time();
1957 with
open(tmpfilename
, 'rb') as ft
:
1960 databytes
= ft
.read(buffersize
[1]);
1961 if not databytes
: break;
1962 datasize
= len(databytes
);
1963 fulldatasize
= datasize
+ fulldatasize
;
1966 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1967 downloaddiff
= fulldatasize
- prevdownsize
;
1968 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1969 prevdownsize
= fulldatasize
;
1972 fdata
= f
.getvalue();
1975 os
.remove(tmpfilename
);
1976 exec_time_end
= time
.time();
1977 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1978 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1981 if(not haverequests
):
1982 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1983 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1987 def download_from_url_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1988 global geturls_download_sleep
, havezstd
, havebrotli
;
1990 sleep
= geturls_download_sleep
;
1993 urlparts
= urlparse
.urlparse(httpurl
);
1994 if(isinstance(httpheaders
, list)):
1995 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1996 httpheaders
= fix_header_names(httpheaders
);
1997 if(httpuseragent
is not None):
1998 if('User-Agent' in httpheaders
):
1999 httpheaders
['User-Agent'] = httpuseragent
;
2001 httpuseragent
.update({'User-Agent': httpuseragent
});
2002 if(httpreferer
is not None):
2003 if('Referer' in httpheaders
):
2004 httpheaders
['Referer'] = httpreferer
;
2006 httpuseragent
.update({'Referer': httpreferer
});
2007 if(urlparts
.username
is not None or urlparts
.password
is not None):
2008 if(sys
.version
[0]=="2"):
2009 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2010 if(sys
.version
[0]>="3"):
2011 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2012 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2014 if(postdata
is not None and not isinstance(postdata
, dict)):
2015 postdata
= urlencode(postdata
);
2017 reqsession
= aiohttp
.ClientSession(cookie_jar
=httpcookie
, headers
=httpheaders
, timeout
=timeout
, read_timeout
=timeout
, conn_timeout
=timeout
, read_bufsize
=buffersize
);
2018 if(httpmethod
=="GET"):
2019 geturls_text
= reqsession
.get(httpurl
);
2020 elif(httpmethod
=="POST"):
2021 geturls_text
= reqsession
.post(httpurl
, data
=postdata
);
2023 geturls_text
= reqsession
.get(httpurl
);
2024 except aiohttp
.exceptions
.ConnectTimeout
:
2025 log
.info("Error With URL "+httpurl
);
2027 except aiohttp
.exceptions
.ConnectError
:
2028 log
.info("Error With URL "+httpurl
);
2030 except socket
.timeout
:
2031 log
.info("Error With URL "+httpurl
);
2033 httpcodeout
= geturls_text
.status
;
2034 httpcodereason
= geturls_text
.reason
;
2035 httpversionout
= geturls_text
.version
;
2036 httpmethodout
= geturls_text
.method
;
2037 httpurlout
= geturls_text
.url
;
2038 httpheaderout
= geturls_text
.headers
;
2039 httpheadersentout
= geturls_text
.request_info
.headers
;
2040 if(isinstance(httpheaderout
, list)):
2041 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2042 if(sys
.version
[0]=="2"):
2044 prehttpheaderout
= httpheaderout
;
2045 httpheaderkeys
= httpheaderout
.keys();
2046 imax
= len(httpheaderkeys
);
2050 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2052 except AttributeError:
2054 httpheaderout
= fix_header_names(httpheaderout
);
2055 if(isinstance(httpheadersentout
, list)):
2056 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2057 httpheadersentout
= fix_header_names(httpheadersentout
);
2058 downloadsize
= httpheaderout
.get('Content-Length');
2059 if(downloadsize
is not None):
2060 downloadsize
= int(downloadsize
);
2061 if downloadsize
is None: downloadsize
= 0;
2064 log
.info("Downloading URL "+httpurl
);
2065 with
BytesIO() as strbuf
:
2067 databytes
= geturls_text
.read(buffersize
);
2068 if not databytes
: break;
2069 datasize
= len(databytes
);
2070 fulldatasize
= datasize
+ fulldatasize
;
2073 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2074 downloaddiff
= fulldatasize
- prevdownsize
;
2075 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2076 prevdownsize
= fulldatasize
;
2077 strbuf
.write(databytes
);
2079 returnval_content
= strbuf
.read();
2080 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2082 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2085 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2087 returnval_content
= zlib
.decompress(returnval_content
);
2090 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2092 returnval_content
= brotli
.decompress(returnval_content
);
2093 except brotli
.error
:
2095 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2097 returnval_content
= zstandard
.decompress(returnval_content
);
2098 except zstandard
.error
:
2100 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2102 returnval_content
= lzma
.decompress(returnval_content
);
2103 except zstandard
.error
:
2105 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2107 returnval_content
= bz2
.decompress(returnval_content
);
2108 except zstandard
.error
:
2110 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "aiohttp"};
2111 geturls_text
.close();
2114 if(not haveaiohttp
):
2115 def download_from_url_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2116 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2120 def download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2121 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2122 exec_time_start
= time
.time();
2123 myhash
= hashlib
.new("sha1");
2124 if(sys
.version
[0]=="2"):
2125 myhash
.update(httpurl
);
2126 myhash
.update(str(buffersize
));
2127 myhash
.update(str(exec_time_start
));
2128 if(sys
.version
[0]>="3"):
2129 myhash
.update(httpurl
.encode('utf-8'));
2130 myhash
.update(str(buffersize
).encode('utf-8'));
2131 myhash
.update(str(exec_time_start
).encode('utf-8'));
2132 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2134 sleep
= geturls_download_sleep
;
2137 pretmpfilename
= download_from_url_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2138 if(not pretmpfilename
):
2140 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2141 tmpfilename
= f
.name
;
2143 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2144 except AttributeError:
2146 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2151 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2152 f
.write(pretmpfilename
.get('Content'));
2154 exec_time_end
= time
.time();
2155 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2156 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2159 if(not haveaiohttp
):
2160 def download_from_url_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2161 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2165 def download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2166 global geturls_download_sleep
, havezstd
, havebrotli
;
2168 sleep
= geturls_download_sleep
;
2171 if(not outfile
=="-"):
2172 outpath
= outpath
.rstrip(os
.path
.sep
);
2173 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2174 if(not os
.path
.exists(outpath
)):
2175 os
.makedirs(outpath
);
2176 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2178 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2180 pretmpfilename
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2181 if(not pretmpfilename
):
2183 tmpfilename
= pretmpfilename
.get('Filename');
2184 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2186 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2187 exec_time_start
= time
.time();
2188 shutil
.move(tmpfilename
, filepath
);
2190 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2191 except AttributeError:
2193 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2198 exec_time_end
= time
.time();
2199 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2200 if(os
.path
.exists(tmpfilename
)):
2201 os
.remove(tmpfilename
);
2202 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2204 pretmpfilename
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2205 tmpfilename
= pretmpfilename
.get('Filename');
2206 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2209 exec_time_start
= time
.time();
2210 with
open(tmpfilename
, 'rb') as ft
:
2213 databytes
= ft
.read(buffersize
[1]);
2214 if not databytes
: break;
2215 datasize
= len(databytes
);
2216 fulldatasize
= datasize
+ fulldatasize
;
2219 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2220 downloaddiff
= fulldatasize
- prevdownsize
;
2221 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2222 prevdownsize
= fulldatasize
;
2225 fdata
= f
.getvalue();
2228 os
.remove(tmpfilename
);
2229 exec_time_end
= time
.time();
2230 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2231 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2234 if(not haveaiohttp
):
2235 def download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2236 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2240 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2241 global geturls_download_sleep
, havezstd
, havebrotli
;
2243 sleep
= geturls_download_sleep
;
2246 urlparts
= urlparse
.urlparse(httpurl
);
2247 if(isinstance(httpheaders
, list)):
2248 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2249 httpheaders
= fix_header_names(httpheaders
);
2250 if(httpuseragent
is not None):
2251 if('User-Agent' in httpheaders
):
2252 httpheaders
['User-Agent'] = httpuseragent
;
2254 httpuseragent
.update({'User-Agent': httpuseragent
});
2255 if(httpreferer
is not None):
2256 if('Referer' in httpheaders
):
2257 httpheaders
['Referer'] = httpreferer
;
2259 httpuseragent
.update({'Referer': httpreferer
});
2260 if(urlparts
.username
is not None or urlparts
.password
is not None):
2261 if(sys
.version
[0]=="2"):
2262 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2263 if(sys
.version
[0]>="3"):
2264 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2265 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2267 if(postdata
is not None and not isinstance(postdata
, dict)):
2268 postdata
= urlencode(postdata
);
2270 if(httpmethod
=="GET"):
2271 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2272 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2273 elif(httpmethod
=="POST"):
2274 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2275 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2277 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2278 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2279 except httpx
.ConnectTimeout
:
2280 log
.info("Error With URL "+httpurl
);
2282 except httpx
.ConnectError
:
2283 log
.info("Error With URL "+httpurl
);
2285 except socket
.timeout
:
2286 log
.info("Error With URL "+httpurl
);
2288 httpcodeout
= geturls_text
.status_code
;
2290 httpcodereason
= geturls_text
.reason_phrase
;
2292 httpcodereason
= http_status_to_reason(geturls_text
.status_code
);
2293 httpversionout
= geturls_text
.http_version
;
2294 httpmethodout
= httpmethod
;
2295 httpurlout
= str(geturls_text
.url
);
2296 httpheaderout
= geturls_text
.headers
;
2297 httpheadersentout
= geturls_text
.request
.headers
;
2298 if(isinstance(httpheaderout
, list)):
2299 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2300 if(sys
.version
[0]=="2"):
2302 prehttpheaderout
= httpheaderout
;
2303 httpheaderkeys
= httpheaderout
.keys();
2304 imax
= len(httpheaderkeys
);
2308 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2310 except AttributeError:
2312 httpheaderout
= fix_header_names(httpheaderout
);
2313 if(isinstance(httpheadersentout
, list)):
2314 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2315 httpheadersentout
= fix_header_names(httpheadersentout
);
2316 downloadsize
= httpheaderout
.get('Content-Length');
2317 if(downloadsize
is not None):
2318 downloadsize
= int(downloadsize
);
2319 if downloadsize
is None: downloadsize
= 0;
2322 log
.info("Downloading URL "+httpurl
);
2323 with
BytesIO() as strbuf
:
2325 databytes
= geturls_text
.read();
2326 if not databytes
: break;
2327 datasize
= len(databytes
);
2328 fulldatasize
= datasize
+ fulldatasize
;
2331 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2332 downloaddiff
= fulldatasize
- prevdownsize
;
2333 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2334 prevdownsize
= fulldatasize
;
2335 strbuf
.write(databytes
);
2338 returnval_content
= strbuf
.read();
2339 geturls_text
.close();
2340 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2342 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2345 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2347 returnval_content
= zlib
.decompress(returnval_content
);
2350 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2352 returnval_content
= brotli
.decompress(returnval_content
);
2353 except brotli
.error
:
2355 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2357 returnval_content
= zstandard
.decompress(returnval_content
);
2358 except zstandard
.error
:
2360 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2362 returnval_content
= lzma
.decompress(returnval_content
);
2363 except zstandard
.error
:
2365 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2367 returnval_content
= bz2
.decompress(returnval_content
);
2368 except zstandard
.error
:
2370 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpx"};
2371 geturls_text
.close();
2375 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2376 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2380 def download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2381 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2382 exec_time_start
= time
.time();
2383 myhash
= hashlib
.new("sha1");
2384 if(sys
.version
[0]=="2"):
2385 myhash
.update(httpurl
);
2386 myhash
.update(str(buffersize
));
2387 myhash
.update(str(exec_time_start
));
2388 if(sys
.version
[0]>="3"):
2389 myhash
.update(httpurl
.encode('utf-8'));
2390 myhash
.update(str(buffersize
).encode('utf-8'));
2391 myhash
.update(str(exec_time_start
).encode('utf-8'));
2392 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2394 sleep
= geturls_download_sleep
;
2397 pretmpfilename
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2398 if(not pretmpfilename
):
2400 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2401 tmpfilename
= f
.name
;
2403 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2404 except AttributeError:
2406 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2411 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2412 f
.write(pretmpfilename
.get('Content'));
2414 exec_time_end
= time
.time();
2415 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2416 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2420 def download_from_url_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2421 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2425 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2426 global geturls_download_sleep
, havezstd
, havebrotli
;
2428 sleep
= geturls_download_sleep
;
2431 if(not outfile
=="-"):
2432 outpath
= outpath
.rstrip(os
.path
.sep
);
2433 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2434 if(not os
.path
.exists(outpath
)):
2435 os
.makedirs(outpath
);
2436 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2438 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2440 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2441 if(not pretmpfilename
):
2443 tmpfilename
= pretmpfilename
.get('Filename');
2444 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2446 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2447 exec_time_start
= time
.time();
2448 shutil
.move(tmpfilename
, filepath
);
2450 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2451 except AttributeError:
2453 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2458 exec_time_end
= time
.time();
2459 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2460 if(os
.path
.exists(tmpfilename
)):
2461 os
.remove(tmpfilename
);
2462 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2464 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2465 tmpfilename
= pretmpfilename
.get('Filename');
2466 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2469 exec_time_start
= time
.time();
2470 with
open(tmpfilename
, 'rb') as ft
:
2473 databytes
= ft
.read(buffersize
[1]);
2474 if not databytes
: break;
2475 datasize
= len(databytes
);
2476 fulldatasize
= datasize
+ fulldatasize
;
2479 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2480 downloaddiff
= fulldatasize
- prevdownsize
;
2481 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2482 prevdownsize
= fulldatasize
;
2485 fdata
= f
.getvalue();
2488 os
.remove(tmpfilename
);
2489 exec_time_end
= time
.time();
2490 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2491 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2495 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2496 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2500 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2501 global geturls_download_sleep
, havezstd
, havebrotli
;
2503 sleep
= geturls_download_sleep
;
2506 urlparts
= urlparse
.urlparse(httpurl
);
2507 if(isinstance(httpheaders
, list)):
2508 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2509 httpheaders
= fix_header_names(httpheaders
);
2510 if(httpuseragent
is not None):
2511 if('User-Agent' in httpheaders
):
2512 httpheaders
['User-Agent'] = httpuseragent
;
2514 httpuseragent
.update({'User-Agent': httpuseragent
});
2515 if(httpreferer
is not None):
2516 if('Referer' in httpheaders
):
2517 httpheaders
['Referer'] = httpreferer
;
2519 httpuseragent
.update({'Referer': httpreferer
});
2520 if(urlparts
.username
is not None or urlparts
.password
is not None):
2521 if(sys
.version
[0]=="2"):
2522 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2523 if(sys
.version
[0]>="3"):
2524 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2525 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2527 if(postdata
is not None and not isinstance(postdata
, dict)):
2528 postdata
= urlencode(postdata
);
2530 if(httpmethod
=="GET"):
2531 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2532 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2533 elif(httpmethod
=="POST"):
2534 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2535 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2537 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2538 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2539 except httpx
.ConnectTimeout
:
2540 log
.info("Error With URL "+httpurl
);
2542 except httpx
.ConnectError
:
2543 log
.info("Error With URL "+httpurl
);
2545 except socket
.timeout
:
2546 log
.info("Error With URL "+httpurl
);
2548 httpcodeout
= geturls_text
.status_code
;
2550 httpcodereason
= geturls_text
.reason_phrase
;
2552 httpcodereason
= http_status_to_reason(geturls_text
.status_code
);
2553 httpversionout
= geturls_text
.http_version
;
2554 httpmethodout
= httpmethod
;
2555 httpurlout
= str(geturls_text
.url
);
2556 httpheaderout
= geturls_text
.headers
;
2557 httpheadersentout
= geturls_text
.request
.headers
;
2558 if(isinstance(httpheaderout
, list)):
2559 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2560 if(sys
.version
[0]=="2"):
2562 prehttpheaderout
= httpheaderout
;
2563 httpheaderkeys
= httpheaderout
.keys();
2564 imax
= len(httpheaderkeys
);
2568 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2570 except AttributeError:
2572 httpheaderout
= fix_header_names(httpheaderout
);
2573 if(isinstance(httpheadersentout
, list)):
2574 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2575 httpheadersentout
= fix_header_names(httpheadersentout
);
2576 downloadsize
= httpheaderout
.get('Content-Length');
2577 if(downloadsize
is not None):
2578 downloadsize
= int(downloadsize
);
2579 if downloadsize
is None: downloadsize
= 0;
2582 log
.info("Downloading URL "+httpurl
);
2583 with
BytesIO() as strbuf
:
2585 databytes
= geturls_text
.read();
2586 if not databytes
: break;
2587 datasize
= len(databytes
);
2588 fulldatasize
= datasize
+ fulldatasize
;
2591 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2592 downloaddiff
= fulldatasize
- prevdownsize
;
2593 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2594 prevdownsize
= fulldatasize
;
2595 strbuf
.write(databytes
);
2598 returnval_content
= strbuf
.read();
2599 geturls_text
.close();
2600 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2602 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2605 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2607 returnval_content
= zlib
.decompress(returnval_content
);
2610 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2612 returnval_content
= brotli
.decompress(returnval_content
);
2613 except brotli
.error
:
2615 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2617 returnval_content
= zstandard
.decompress(returnval_content
);
2618 except zstandard
.error
:
2620 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2622 returnval_content
= lzma
.decompress(returnval_content
);
2623 except zstandard
.error
:
2625 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2627 returnval_content
= bz2
.decompress(returnval_content
);
2628 except zstandard
.error
:
2630 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpx2"};
2631 geturls_text
.close();
2635 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2636 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2640 def download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2641 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2642 exec_time_start
= time
.time();
2643 myhash
= hashlib
.new("sha1");
2644 if(sys
.version
[0]=="2"):
2645 myhash
.update(httpurl
);
2646 myhash
.update(str(buffersize
));
2647 myhash
.update(str(exec_time_start
));
2648 if(sys
.version
[0]>="3"):
2649 myhash
.update(httpurl
.encode('utf-8'));
2650 myhash
.update(str(buffersize
).encode('utf-8'));
2651 myhash
.update(str(exec_time_start
).encode('utf-8'));
2652 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2654 sleep
= geturls_download_sleep
;
2657 pretmpfilename
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2658 if(not pretmpfilename
):
2660 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2661 tmpfilename
= f
.name
;
2663 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2664 except AttributeError:
2666 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2671 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2672 f
.write(pretmpfilename
.get('Content'));
2674 exec_time_end
= time
.time();
2675 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2676 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2680 def download_from_url_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2681 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2685 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2686 global geturls_download_sleep
, havezstd
, havebrotli
;
2688 sleep
= geturls_download_sleep
;
2691 if(not outfile
=="-"):
2692 outpath
= outpath
.rstrip(os
.path
.sep
);
2693 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2694 if(not os
.path
.exists(outpath
)):
2695 os
.makedirs(outpath
);
2696 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2698 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2700 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2701 if(not pretmpfilename
):
2703 tmpfilename
= pretmpfilename
.get('Filename');
2704 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2706 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2707 exec_time_start
= time
.time();
2708 shutil
.move(tmpfilename
, filepath
);
2710 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2711 except AttributeError:
2713 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2718 exec_time_end
= time
.time();
2719 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2720 if(os
.path
.exists(tmpfilename
)):
2721 os
.remove(tmpfilename
);
2722 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2724 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2725 tmpfilename
= pretmpfilename
.get('Filename');
2726 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2729 exec_time_start
= time
.time();
2730 with
open(tmpfilename
, 'rb') as ft
:
2733 databytes
= ft
.read(buffersize
[1]);
2734 if not databytes
: break;
2735 datasize
= len(databytes
);
2736 fulldatasize
= datasize
+ fulldatasize
;
2739 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2740 downloaddiff
= fulldatasize
- prevdownsize
;
2741 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2742 prevdownsize
= fulldatasize
;
2745 fdata
= f
.getvalue();
2748 os
.remove(tmpfilename
);
2749 exec_time_end
= time
.time();
2750 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2751 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2755 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2756 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2760 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2761 global geturls_download_sleep
, havezstd
, havebrotli
;
2763 sleep
= geturls_download_sleep
;
2766 urlparts
= urlparse
.urlparse(httpurl
);
2767 if(isinstance(httpheaders
, list)):
2768 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2769 httpheaders
= fix_header_names(httpheaders
);
2770 if(httpuseragent
is not None):
2771 if('User-Agent' in httpheaders
):
2772 httpheaders
['User-Agent'] = httpuseragent
;
2774 httpuseragent
.update({'User-Agent': httpuseragent
});
2775 if(httpreferer
is not None):
2776 if('Referer' in httpheaders
):
2777 httpheaders
['Referer'] = httpreferer
;
2779 httpuseragent
.update({'Referer': httpreferer
});
2780 if(urlparts
.username
is not None or urlparts
.password
is not None):
2781 if(sys
.version
[0]=="2"):
2782 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2783 if(sys
.version
[0]>="3"):
2784 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2785 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2787 if(postdata
is not None and not isinstance(postdata
, dict)):
2788 postdata
= urlencode(postdata
);
2790 if(httpmethod
=="GET"):
2791 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2792 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2793 elif(httpmethod
=="POST"):
2794 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2795 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2797 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2798 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2799 except httpcore
.ConnectTimeout
:
2800 log
.info("Error With URL "+httpurl
);
2802 except httpcore
.ConnectError
:
2803 log
.info("Error With URL "+httpurl
);
2805 except socket
.timeout
:
2806 log
.info("Error With URL "+httpurl
);
2808 httpcodeout
= geturls_text
.status
;
2809 httpcodereason
= http_status_to_reason(geturls_text
.status
);
2810 httpversionout
= "1.1";
2811 httpmethodout
= httpmethod
;
2812 httpurlout
= str(httpurl
);
2813 httpheaderout
= geturls_text
.headers
;
2814 httpheadersentout
= httpheaders
;
2815 if(isinstance(httpheaderout
, list)):
2816 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2817 if(sys
.version
[0]=="2"):
2819 prehttpheaderout
= httpheaderout
;
2820 httpheaderkeys
= httpheaderout
.keys();
2821 imax
= len(httpheaderkeys
);
2825 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2827 except AttributeError:
2829 httpheaderout
= fix_header_names(httpheaderout
);
2830 if(isinstance(httpheadersentout
, list)):
2831 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2832 httpheadersentout
= fix_header_names(httpheadersentout
);
2833 downloadsize
= httpheaderout
.get('Content-Length');
2834 if(downloadsize
is not None):
2835 downloadsize
= int(downloadsize
);
2836 if downloadsize
is None: downloadsize
= 0;
2839 log
.info("Downloading URL "+httpurl
);
2840 with
BytesIO() as strbuf
:
2842 databytes
= geturls_text
.read();
2843 if not databytes
: break;
2844 datasize
= len(databytes
);
2845 fulldatasize
= datasize
+ fulldatasize
;
2848 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2849 downloaddiff
= fulldatasize
- prevdownsize
;
2850 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2851 prevdownsize
= fulldatasize
;
2852 strbuf
.write(databytes
);
2855 returnval_content
= strbuf
.read();
2856 geturls_text
.close();
2857 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2859 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2862 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2864 returnval_content
= zlib
.decompress(returnval_content
);
2867 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2869 returnval_content
= brotli
.decompress(returnval_content
);
2870 except brotli
.error
:
2872 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2874 returnval_content
= zstandard
.decompress(returnval_content
);
2875 except zstandard
.error
:
2877 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2879 returnval_content
= lzma
.decompress(returnval_content
);
2880 except zstandard
.error
:
2882 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2884 returnval_content
= bz2
.decompress(returnval_content
);
2885 except zstandard
.error
:
2887 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpcore"};
2888 geturls_text
.close();
2891 if(not havehttpcore
):
2892 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2893 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2897 def download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2898 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2899 exec_time_start
= time
.time();
2900 myhash
= hashlib
.new("sha1");
2901 if(sys
.version
[0]=="2"):
2902 myhash
.update(httpurl
);
2903 myhash
.update(str(buffersize
));
2904 myhash
.update(str(exec_time_start
));
2905 if(sys
.version
[0]>="3"):
2906 myhash
.update(httpurl
.encode('utf-8'));
2907 myhash
.update(str(buffersize
).encode('utf-8'));
2908 myhash
.update(str(exec_time_start
).encode('utf-8'));
2909 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2911 sleep
= geturls_download_sleep
;
2914 pretmpfilename
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2915 if(not pretmpfilename
):
2917 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2918 tmpfilename
= f
.name
;
2920 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2921 except AttributeError:
2923 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2928 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2929 f
.write(pretmpfilename
.get('Content'));
2931 exec_time_end
= time
.time();
2932 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2933 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2936 if(not havehttpcore
):
2937 def download_from_url_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2938 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2942 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2943 global geturls_download_sleep
, havezstd
, havebrotli
;
2945 sleep
= geturls_download_sleep
;
2948 if(not outfile
=="-"):
2949 outpath
= outpath
.rstrip(os
.path
.sep
);
2950 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2951 if(not os
.path
.exists(outpath
)):
2952 os
.makedirs(outpath
);
2953 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2955 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2957 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2958 if(not pretmpfilename
):
2960 tmpfilename
= pretmpfilename
.get('Filename');
2961 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2963 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2964 exec_time_start
= time
.time();
2965 shutil
.move(tmpfilename
, filepath
);
2967 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2968 except AttributeError:
2970 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2975 exec_time_end
= time
.time();
2976 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2977 if(os
.path
.exists(tmpfilename
)):
2978 os
.remove(tmpfilename
);
2979 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2981 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2982 tmpfilename
= pretmpfilename
.get('Filename');
2983 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2986 exec_time_start
= time
.time();
2987 with
open(tmpfilename
, 'rb') as ft
:
2990 databytes
= ft
.read(buffersize
[1]);
2991 if not databytes
: break;
2992 datasize
= len(databytes
);
2993 fulldatasize
= datasize
+ fulldatasize
;
2996 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2997 downloaddiff
= fulldatasize
- prevdownsize
;
2998 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2999 prevdownsize
= fulldatasize
;
3002 fdata
= f
.getvalue();
3005 os
.remove(tmpfilename
);
3006 exec_time_end
= time
.time();
3007 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3008 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3011 if(not havehttpcore
):
3012 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3013 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3017 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3018 global geturls_download_sleep
, havezstd
, havebrotli
;
3020 sleep
= geturls_download_sleep
;
3023 urlparts
= urlparse
.urlparse(httpurl
);
3024 if(isinstance(httpheaders
, list)):
3025 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3026 httpheaders
= fix_header_names(httpheaders
);
3027 if(httpuseragent
is not None):
3028 if('User-Agent' in httpheaders
):
3029 httpheaders
['User-Agent'] = httpuseragent
;
3031 httpuseragent
.update({'User-Agent': httpuseragent
});
3032 if(httpreferer
is not None):
3033 if('Referer' in httpheaders
):
3034 httpheaders
['Referer'] = httpreferer
;
3036 httpuseragent
.update({'Referer': httpreferer
});
3037 if(urlparts
.username
is not None or urlparts
.password
is not None):
3038 if(sys
.version
[0]=="2"):
3039 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3040 if(sys
.version
[0]>="3"):
3041 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3042 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3044 if(postdata
is not None and not isinstance(postdata
, dict)):
3045 postdata
= urlencode(postdata
);
3047 if(httpmethod
=="GET"):
3048 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3049 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3050 elif(httpmethod
=="POST"):
3051 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3052 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
3054 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3055 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3056 except httpcore
.ConnectTimeout
:
3057 log
.info("Error With URL "+httpurl
);
3059 except httpcore
.ConnectError
:
3060 log
.info("Error With URL "+httpurl
);
3062 except socket
.timeout
:
3063 log
.info("Error With URL "+httpurl
);
3065 httpcodeout
= geturls_text
.status
;
3066 httpcodereason
= http_status_to_reason(geturls_text
.status
);
3067 httpversionout
= "1.1";
3068 httpmethodout
= httpmethod
;
3069 httpurlout
= str(httpurl
);
3070 httpheaderout
= geturls_text
.headers
;
3071 httpheadersentout
= httpheaders
;
3072 if(isinstance(httpheaderout
, list)):
3073 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3074 if(sys
.version
[0]=="2"):
3076 prehttpheaderout
= httpheaderout
;
3077 httpheaderkeys
= httpheaderout
.keys();
3078 imax
= len(httpheaderkeys
);
3082 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3084 except AttributeError:
3086 httpheaderout
= fix_header_names(httpheaderout
);
3087 if(isinstance(httpheadersentout
, list)):
3088 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3089 httpheadersentout
= fix_header_names(httpheadersentout
);
3090 downloadsize
= httpheaderout
.get('Content-Length');
3091 if(downloadsize
is not None):
3092 downloadsize
= int(downloadsize
);
3093 if downloadsize
is None: downloadsize
= 0;
3096 log
.info("Downloading URL "+httpurl
);
3097 with
BytesIO() as strbuf
:
3099 databytes
= geturls_text
.read();
3100 if not databytes
: break;
3101 datasize
= len(databytes
);
3102 fulldatasize
= datasize
+ fulldatasize
;
3105 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3106 downloaddiff
= fulldatasize
- prevdownsize
;
3107 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3108 prevdownsize
= fulldatasize
;
3109 strbuf
.write(databytes
);
3112 returnval_content
= strbuf
.read();
3113 geturls_text
.close();
3114 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3116 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3119 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3121 returnval_content
= zlib
.decompress(returnval_content
);
3124 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3126 returnval_content
= brotli
.decompress(returnval_content
);
3127 except brotli
.error
:
3129 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3131 returnval_content
= zstandard
.decompress(returnval_content
);
3132 except zstandard
.error
:
3134 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3136 returnval_content
= lzma
.decompress(returnval_content
);
3137 except zstandard
.error
:
3139 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3141 returnval_content
= bz2
.decompress(returnval_content
);
3142 except zstandard
.error
:
3144 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpcore2"};
3145 geturls_text
.close();
3148 if(not havehttpcore
):
3149 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3150 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3154 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3155 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3156 exec_time_start
= time
.time();
3157 myhash
= hashlib
.new("sha1");
3158 if(sys
.version
[0]=="2"):
3159 myhash
.update(httpurl
);
3160 myhash
.update(str(buffersize
));
3161 myhash
.update(str(exec_time_start
));
3162 if(sys
.version
[0]>="3"):
3163 myhash
.update(httpurl
.encode('utf-8'));
3164 myhash
.update(str(buffersize
).encode('utf-8'));
3165 myhash
.update(str(exec_time_start
).encode('utf-8'));
3166 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3168 sleep
= geturls_download_sleep
;
3171 pretmpfilename
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3172 if(not pretmpfilename
):
3174 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3175 tmpfilename
= f
.name
;
3177 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3178 except AttributeError:
3180 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3185 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3186 f
.write(pretmpfilename
.get('Content'));
3188 exec_time_end
= time
.time();
3189 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3190 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3193 if(not havehttpcore
):
3194 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3195 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3199 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3200 global geturls_download_sleep
, havezstd
, havebrotli
;
3202 sleep
= geturls_download_sleep
;
3205 if(not outfile
=="-"):
3206 outpath
= outpath
.rstrip(os
.path
.sep
);
3207 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3208 if(not os
.path
.exists(outpath
)):
3209 os
.makedirs(outpath
);
3210 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3212 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3214 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3215 if(not pretmpfilename
):
3217 tmpfilename
= pretmpfilename
.get('Filename');
3218 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3220 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3221 exec_time_start
= time
.time();
3222 shutil
.move(tmpfilename
, filepath
);
3224 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3225 except AttributeError:
3227 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3232 exec_time_end
= time
.time();
3233 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3234 if(os
.path
.exists(tmpfilename
)):
3235 os
.remove(tmpfilename
);
3236 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3238 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3239 tmpfilename
= pretmpfilename
.get('Filename');
3240 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3243 exec_time_start
= time
.time();
3244 with
open(tmpfilename
, 'rb') as ft
:
3247 databytes
= ft
.read(buffersize
[1]);
3248 if not databytes
: break;
3249 datasize
= len(databytes
);
3250 fulldatasize
= datasize
+ fulldatasize
;
3253 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3254 downloaddiff
= fulldatasize
- prevdownsize
;
3255 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3256 prevdownsize
= fulldatasize
;
3259 fdata
= f
.getvalue();
3262 os
.remove(tmpfilename
);
3263 exec_time_end
= time
.time();
3264 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3265 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3269 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3270 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3274 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3275 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3278 if(not haveurllib3
):
3279 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3280 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3284 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3285 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3288 if(not haveurllib3
):
3289 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3290 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3294 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3295 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3298 if(not haveurllib3
):
3299 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3300 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3304 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3305 global geturls_download_sleep
, havezstd
, havebrotli
;
3307 sleep
= geturls_download_sleep
;
3310 urlparts
= urlparse
.urlparse(httpurl
);
3311 if(isinstance(httpheaders
, list)):
3312 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3313 httpheaders
= fix_header_names(httpheaders
);
3314 if(httpuseragent
is not None):
3315 if('User-Agent' in httpheaders
):
3316 httpheaders
['User-Agent'] = httpuseragent
;
3318 httpuseragent
.update({'User-Agent': httpuseragent
});
3319 if(httpreferer
is not None):
3320 if('Referer' in httpheaders
):
3321 httpheaders
['Referer'] = httpreferer
;
3323 httpuseragent
.update({'Referer': httpreferer
});
3324 if(urlparts
.username
is not None or urlparts
.password
is not None):
3325 if(sys
.version
[0]=="2"):
3326 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3327 if(sys
.version
[0]>="3"):
3328 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3329 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3331 timeout
= urllib3
.util
.Timeout(connect
=timeout
, read
=timeout
);
3332 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
, timeout
=timeout
);
3333 if(postdata
is not None and not isinstance(postdata
, dict)):
3334 postdata
= urlencode(postdata
);
3336 if(httpmethod
=="GET"):
3337 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3338 elif(httpmethod
=="POST"):
3339 geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
3341 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3342 except urllib3
.exceptions
.ConnectTimeoutError
:
3343 log
.info("Error With URL "+httpurl
);
3345 except urllib3
.exceptions
.ConnectError
:
3346 log
.info("Error With URL "+httpurl
);
3348 except urllib3
.exceptions
.MaxRetryError
:
3349 log
.info("Error With URL "+httpurl
);
3351 except socket
.timeout
:
3352 log
.info("Error With URL "+httpurl
);
3355 log
.info("Error With URL "+httpurl
);
3357 httpcodeout
= geturls_text
.status
;
3358 httpcodereason
= geturls_text
.reason
;
3359 if(geturls_text
.version
=="10"):
3360 httpversionout
= "1.0";
3362 httpversionout
= "1.1";
3363 httpmethodout
= httpmethod
;
3364 httpurlout
= geturls_text
.geturl();
3365 httpheaderout
= geturls_text
.info();
3366 httpheadersentout
= httpheaders
;
3367 if(isinstance(httpheaderout
, list)):
3368 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3369 if(sys
.version
[0]=="2"):
3371 prehttpheaderout
= httpheaderout
;
3372 httpheaderkeys
= httpheaderout
.keys();
3373 imax
= len(httpheaderkeys
);
3377 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3379 except AttributeError:
3381 httpheaderout
= fix_header_names(httpheaderout
);
3382 if(isinstance(httpheadersentout
, list)):
3383 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3384 httpheadersentout
= fix_header_names(httpheadersentout
);
3385 downloadsize
= httpheaderout
.get('Content-Length');
3386 if(downloadsize
is not None):
3387 downloadsize
= int(downloadsize
);
3388 if downloadsize
is None: downloadsize
= 0;
3391 log
.info("Downloading URL "+httpurl
);
3392 with
BytesIO() as strbuf
:
3394 databytes
= geturls_text
.read(buffersize
);
3395 if not databytes
: break;
3396 datasize
= len(databytes
);
3397 fulldatasize
= datasize
+ fulldatasize
;
3400 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3401 downloaddiff
= fulldatasize
- prevdownsize
;
3402 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3403 prevdownsize
= fulldatasize
;
3404 strbuf
.write(databytes
);
3406 returnval_content
= strbuf
.read();
3407 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3409 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3412 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3414 returnval_content
= zlib
.decompress(returnval_content
);
3417 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3419 returnval_content
= brotli
.decompress(returnval_content
);
3420 except brotli
.error
:
3422 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3424 returnval_content
= zstandard
.decompress(returnval_content
);
3425 except zstandard
.error
:
3427 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3429 returnval_content
= lzma
.decompress(returnval_content
);
3430 except zstandard
.error
:
3432 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3434 returnval_content
= bz2
.decompress(returnval_content
);
3435 except zstandard
.error
:
3437 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "urllib3"};
3438 geturls_text
.close();
3441 if(not haveurllib3
):
3442 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3443 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3447 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3448 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3449 exec_time_start
= time
.time();
3450 myhash
= hashlib
.new("sha1");
3451 if(sys
.version
[0]=="2"):
3452 myhash
.update(httpurl
);
3453 myhash
.update(str(buffersize
));
3454 myhash
.update(str(exec_time_start
));
3455 if(sys
.version
[0]>="3"):
3456 myhash
.update(httpurl
.encode('utf-8'));
3457 myhash
.update(str(buffersize
).encode('utf-8'));
3458 myhash
.update(str(exec_time_start
).encode('utf-8'));
3459 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3461 sleep
= geturls_download_sleep
;
3464 pretmpfilename
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3465 if(not pretmpfilename
):
3467 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3468 tmpfilename
= f
.name
;
3470 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3471 except AttributeError:
3473 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3478 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3479 f
.write(pretmpfilename
.get('Content'));
3481 exec_time_end
= time
.time();
3482 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3483 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3486 if(not haveurllib3
):
3487 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3488 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3492 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3493 global geturls_download_sleep
, havezstd
, havebrotli
;
3495 sleep
= geturls_download_sleep
;
3498 if(not outfile
=="-"):
3499 outpath
= outpath
.rstrip(os
.path
.sep
);
3500 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3501 if(not os
.path
.exists(outpath
)):
3502 os
.makedirs(outpath
);
3503 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3505 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3507 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3508 if(not pretmpfilename
):
3510 tmpfilename
= pretmpfilename
.get('Filename');
3511 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3513 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3514 exec_time_start
= time
.time();
3515 shutil
.move(tmpfilename
, filepath
);
3517 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3518 except AttributeError:
3520 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3525 exec_time_end
= time
.time();
3526 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3527 if(os
.path
.exists(tmpfilename
)):
3528 os
.remove(tmpfilename
);
3529 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3531 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3532 tmpfilename
= pretmpfilename
.get('Filename');
3533 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3536 exec_time_start
= time
.time();
3537 with
open(tmpfilename
, 'rb') as ft
:
3540 databytes
= ft
.read(buffersize
[1]);
3541 if not databytes
: break;
3542 datasize
= len(databytes
);
3543 fulldatasize
= datasize
+ fulldatasize
;
3546 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3547 downloaddiff
= fulldatasize
- prevdownsize
;
3548 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3549 prevdownsize
= fulldatasize
;
3552 fdata
= f
.getvalue();
3555 os
.remove(tmpfilename
);
3556 exec_time_end
= time
.time();
3557 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3558 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3561 if(not haveurllib3
):
3562 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3563 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3567 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3568 global geturls_download_sleep
, havezstd
, havebrotli
;
3570 sleep
= geturls_download_sleep
;
3573 urlparts
= urlparse
.urlparse(httpurl
);
3574 if(isinstance(httpheaders
, list)):
3575 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3576 httpheaders
= fix_header_names(httpheaders
);
3577 if(httpuseragent
is not None):
3578 if('User-Agent' in httpheaders
):
3579 httpheaders
['User-Agent'] = httpuseragent
;
3581 httpuseragent
.update({'User-Agent': httpuseragent
});
3582 if(httpreferer
is not None):
3583 if('Referer' in httpheaders
):
3584 httpheaders
['Referer'] = httpreferer
;
3586 httpuseragent
.update({'Referer': httpreferer
});
3587 if(urlparts
.username
is not None or urlparts
.password
is not None):
3588 if(sys
.version
[0]=="2"):
3589 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3590 if(sys
.version
[0]>="3"):
3591 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3592 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3593 geturls_opener
= mechanize
.Browser();
3594 if(isinstance(httpheaders
, dict)):
3595 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
3597 geturls_opener
.addheaders
= httpheaders
;
3598 geturls_opener
.set_cookiejar(httpcookie
);
3599 geturls_opener
.set_handle_robots(False);
3600 if(postdata
is not None and not isinstance(postdata
, dict)):
3601 postdata
= urlencode(postdata
);
3603 if(httpmethod
=="GET"):
3604 geturls_text
= geturls_opener
.open(httpurl
);
3605 elif(httpmethod
=="POST"):
3606 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
3608 geturls_text
= geturls_opener
.open(httpurl
);
3609 except mechanize
.HTTPError
as geturls_text_error
:
3610 geturls_text
= geturls_text_error
;
3611 log
.info("Error With URL "+httpurl
);
3613 log
.info("Error With URL "+httpurl
);
3615 except socket
.timeout
:
3616 log
.info("Error With URL "+httpurl
);
3618 httpcodeout
= geturls_text
.code
;
3619 httpcodereason
= geturls_text
.msg
;
3620 httpversionout
= "1.1";
3621 httpmethodout
= httpmethod
;
3622 httpurlout
= geturls_text
.geturl();
3623 httpheaderout
= geturls_text
.info();
3624 reqhead
= geturls_opener
.request
;
3625 httpheadersentout
= reqhead
.header_items();
3626 if(isinstance(httpheaderout
, list)):
3627 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3628 if(sys
.version
[0]=="2"):
3630 prehttpheaderout
= httpheaderout
;
3631 httpheaderkeys
= httpheaderout
.keys();
3632 imax
= len(httpheaderkeys
);
3636 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3638 except AttributeError:
3640 httpheaderout
= fix_header_names(httpheaderout
);
3641 if(isinstance(httpheadersentout
, list)):
3642 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3643 httpheadersentout
= fix_header_names(httpheadersentout
);
3644 downloadsize
= httpheaderout
.get('Content-Length');
3645 if(downloadsize
is not None):
3646 downloadsize
= int(downloadsize
);
3647 if downloadsize
is None: downloadsize
= 0;
3650 log
.info("Downloading URL "+httpurl
);
3651 with
BytesIO() as strbuf
:
3653 databytes
= geturls_text
.read(buffersize
);
3654 if not databytes
: break;
3655 datasize
= len(databytes
);
3656 fulldatasize
= datasize
+ fulldatasize
;
3659 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3660 downloaddiff
= fulldatasize
- prevdownsize
;
3661 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3662 prevdownsize
= fulldatasize
;
3663 strbuf
.write(databytes
);
3665 returnval_content
= strbuf
.read();
3666 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3668 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3671 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3673 returnval_content
= zlib
.decompress(returnval_content
);
3676 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3678 returnval_content
= brotli
.decompress(returnval_content
);
3679 except brotli
.error
:
3681 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3683 returnval_content
= zstandard
.decompress(returnval_content
);
3684 except zstandard
.error
:
3686 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3688 returnval_content
= lzma
.decompress(returnval_content
);
3689 except zstandard
.error
:
3691 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3693 returnval_content
= bz2
.decompress(returnval_content
);
3694 except zstandard
.error
:
3696 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "mechanize"};
3697 geturls_text
.close();
3700 if(not havemechanize
):
3701 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3702 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3706 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3707 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3708 exec_time_start
= time
.time();
3709 myhash
= hashlib
.new("sha1");
3710 if(sys
.version
[0]=="2"):
3711 myhash
.update(httpurl
);
3712 myhash
.update(str(buffersize
));
3713 myhash
.update(str(exec_time_start
));
3714 if(sys
.version
[0]>="3"):
3715 myhash
.update(httpurl
.encode('utf-8'));
3716 myhash
.update(str(buffersize
).encode('utf-8'));
3717 myhash
.update(str(exec_time_start
).encode('utf-8'));
3718 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3720 sleep
= geturls_download_sleep
;
3723 pretmpfilename
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3724 if(not pretmpfilename
):
3726 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3727 tmpfilename
= f
.name
;
3729 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3730 except AttributeError:
3732 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3737 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3738 f
.write(pretmpfilename
.get('Content'));
3740 exec_time_end
= time
.time();
3741 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3742 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3745 if(not havemechanize
):
3746 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3747 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3751 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3752 global geturls_download_sleep
, havezstd
, havebrotli
;
3754 sleep
= geturls_download_sleep
;
3757 if(not outfile
=="-"):
3758 outpath
= outpath
.rstrip(os
.path
.sep
);
3759 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3760 if(not os
.path
.exists(outpath
)):
3761 os
.makedirs(outpath
);
3762 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3764 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3766 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3767 if(not pretmpfilename
):
3769 tmpfilename
= pretmpfilename
.get('Filename');
3770 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3772 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3773 exec_time_start
= time
.time();
3774 shutil
.move(tmpfilename
, filepath
);
3776 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3777 except AttributeError:
3779 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3784 exec_time_end
= time
.time();
3785 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3786 if(os
.path
.exists(tmpfilename
)):
3787 os
.remove(tmpfilename
);
3788 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3790 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3791 tmpfilename
= pretmpfilename
.get('Filename');
3792 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3795 exec_time_start
= time
.time();
3796 with
open(tmpfilename
, 'rb') as ft
:
3799 databytes
= ft
.read(buffersize
[1]);
3800 if not databytes
: break;
3801 datasize
= len(databytes
);
3802 fulldatasize
= datasize
+ fulldatasize
;
3805 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3806 downloaddiff
= fulldatasize
- prevdownsize
;
3807 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3808 prevdownsize
= fulldatasize
;
3811 fdata
= f
.getvalue();
3814 os
.remove(tmpfilename
);
3815 exec_time_end
= time
.time();
3816 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3817 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3820 if(not havemechanize
):
3821 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3822 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3826 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3827 global geturls_download_sleep
, havezstd
, havebrotli
;
3829 sleep
= geturls_download_sleep
;
3832 urlparts
= urlparse
.urlparse(httpurl
);
3833 if(isinstance(httpheaders
, list)):
3834 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3835 httpheaders
= fix_header_names(httpheaders
);
3836 if(httpuseragent
is not None):
3837 if('User-Agent' in httpheaders
):
3838 httpheaders
['User-Agent'] = httpuseragent
;
3840 httpuseragent
.update({'User-Agent': httpuseragent
});
3841 if(httpreferer
is not None):
3842 if('Referer' in httpheaders
):
3843 httpheaders
['Referer'] = httpreferer
;
3845 httpuseragent
.update({'Referer': httpreferer
});
3846 if(urlparts
.username
is not None or urlparts
.password
is not None):
3847 if(sys
.version
[0]=="2"):
3848 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3849 if(sys
.version
[0]>="3"):
3850 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3851 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3852 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
3853 if(isinstance(httpheaders
, dict)):
3854 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
3855 geturls_opener
.addheaders
= httpheaders
;
3857 if(postdata
is not None and not isinstance(postdata
, dict)):
3858 postdata
= urlencode(postdata
);
3859 retrieved_body
= BytesIO();
3860 retrieved_headers
= BytesIO();
3862 if(httpmethod
=="GET"):
3863 geturls_text
= pycurl
.Curl();
3864 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3865 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3866 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3867 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3868 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3869 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3870 geturls_text
.perform();
3871 elif(httpmethod
=="POST"):
3872 geturls_text
= pycurl
.Curl();
3873 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3874 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3875 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3876 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3877 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3878 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3879 geturls_text
.setopt(geturls_text
.POST
, True);
3880 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
3881 geturls_text
.perform();
3883 geturls_text
= pycurl
.Curl();
3884 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3885 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3886 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3887 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3888 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3889 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3890 geturls_text
.perform();
3891 retrieved_headers
.seek(0);
3892 if(sys
.version
[0]=="2"):
3893 pycurlhead
= retrieved_headers
.read();
3894 if(sys
.version
[0]>="3"):
3895 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
3896 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip().rstrip('\r\n'))[0];
3897 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
3898 retrieved_body
.seek(0);
3899 except socket
.timeout
:
3900 log
.info("Error With URL "+httpurl
);
3902 except socket
.gaierror
:
3903 log
.info("Error With URL "+httpurl
);
3906 log
.info("Error With URL "+httpurl
);
3908 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
3909 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
3910 httpversionout
= pyhttpverinfo
[0];
3911 httpmethodout
= httpmethod
;
3912 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
3913 httpheaderout
= pycurlheadersout
;
3914 httpheadersentout
= httpheaders
;
3915 if(isinstance(httpheaderout
, list)):
3916 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
3917 if(sys
.version
[0]=="2"):
3919 prehttpheaderout
= httpheaderout
;
3920 httpheaderkeys
= httpheaderout
.keys();
3921 imax
= len(httpheaderkeys
);
3925 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3927 except AttributeError:
3929 httpheaderout
= fix_header_names(httpheaderout
);
3930 if(isinstance(httpheadersentout
, list)):
3931 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
3932 httpheadersentout
= fix_header_names(httpheadersentout
);
3933 downloadsize
= httpheaderout
.get('Content-Length');
3934 if(downloadsize
is not None):
3935 downloadsize
= int(downloadsize
);
3936 if downloadsize
is None: downloadsize
= 0;
3939 log
.info("Downloading URL "+httpurl
);
3940 with
BytesIO() as strbuf
:
3942 databytes
= retrieved_body
.read(buffersize
);
3943 if not databytes
: break;
3944 datasize
= len(databytes
);
3945 fulldatasize
= datasize
+ fulldatasize
;
3948 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3949 downloaddiff
= fulldatasize
- prevdownsize
;
3950 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3951 prevdownsize
= fulldatasize
;
3952 strbuf
.write(databytes
);
3954 returnval_content
= strbuf
.read();
3955 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3957 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3960 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3962 returnval_content
= zlib
.decompress(returnval_content
);
3965 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3967 returnval_content
= brotli
.decompress(returnval_content
);
3968 except brotli
.error
:
3970 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3972 returnval_content
= zstandard
.decompress(returnval_content
);
3973 except zstandard
.error
:
3975 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3977 returnval_content
= lzma
.decompress(returnval_content
);
3978 except zstandard
.error
:
3980 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3982 returnval_content
= bz2
.decompress(returnval_content
);
3983 except zstandard
.error
:
3985 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl"};
3986 geturls_text
.close();
3990 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3991 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3995 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3996 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3997 exec_time_start
= time
.time();
3998 myhash
= hashlib
.new("sha1");
3999 if(sys
.version
[0]=="2"):
4000 myhash
.update(httpurl
);
4001 myhash
.update(str(buffersize
));
4002 myhash
.update(str(exec_time_start
));
4003 if(sys
.version
[0]>="3"):
4004 myhash
.update(httpurl
.encode('utf-8'));
4005 myhash
.update(str(buffersize
).encode('utf-8'));
4006 myhash
.update(str(exec_time_start
).encode('utf-8'));
4007 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4009 sleep
= geturls_download_sleep
;
4012 pretmpfilename
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4013 if(not pretmpfilename
):
4015 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4016 tmpfilename
= f
.name
;
4018 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4019 except AttributeError:
4021 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4026 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4027 f
.write(pretmpfilename
.get('Content'));
4029 exec_time_end
= time
.time();
4030 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4031 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4035 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4036 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4040 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4041 global geturls_download_sleep
, havezstd
, havebrotli
;
4043 sleep
= geturls_download_sleep
;
4046 if(not outfile
=="-"):
4047 outpath
= outpath
.rstrip(os
.path
.sep
);
4048 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4049 if(not os
.path
.exists(outpath
)):
4050 os
.makedirs(outpath
);
4051 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4053 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4055 pretmpfilename
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4056 if(not pretmpfilename
):
4058 tmpfilename
= pretmpfilename
.get('Filename');
4059 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4061 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4062 exec_time_start
= time
.time();
4063 shutil
.move(tmpfilename
, filepath
);
4065 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4066 except AttributeError:
4068 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4073 exec_time_end
= time
.time();
4074 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4075 if(os
.path
.exists(tmpfilename
)):
4076 os
.remove(tmpfilename
);
4077 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4079 pretmpfilename
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4080 tmpfilename
= pretmpfilename
.get('Filename');
4081 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4084 exec_time_start
= time
.time();
4085 with
open(tmpfilename
, 'rb') as ft
:
4088 databytes
= ft
.read(buffersize
[1]);
4089 if not databytes
: break;
4090 datasize
= len(databytes
);
4091 fulldatasize
= datasize
+ fulldatasize
;
4094 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4095 downloaddiff
= fulldatasize
- prevdownsize
;
4096 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4097 prevdownsize
= fulldatasize
;
4100 fdata
= f
.getvalue();
4103 os
.remove(tmpfilename
);
4104 exec_time_end
= time
.time();
4105 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4106 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4110 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4111 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4114 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4115 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4116 global geturls_download_sleep
, havezstd
, havebrotli
;
4118 sleep
= geturls_download_sleep
;
4121 urlparts
= urlparse
.urlparse(httpurl
);
4122 if(isinstance(httpheaders
, list)):
4123 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4124 httpheaders
= fix_header_names(httpheaders
);
4125 if(httpuseragent
is not None):
4126 if('User-Agent' in httpheaders
):
4127 httpheaders
['User-Agent'] = httpuseragent
;
4129 httpuseragent
.update({'User-Agent': httpuseragent
});
4130 if(httpreferer
is not None):
4131 if('Referer' in httpheaders
):
4132 httpheaders
['Referer'] = httpreferer
;
4134 httpuseragent
.update({'Referer': httpreferer
});
4135 if(urlparts
.username
is not None or urlparts
.password
is not None):
4136 if(sys
.version
[0]=="2"):
4137 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
4138 if(sys
.version
[0]>="3"):
4139 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4140 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4141 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
4142 if(isinstance(httpheaders
, dict)):
4143 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
4144 geturls_opener
.addheaders
= httpheaders
;
4146 if(postdata
is not None and not isinstance(postdata
, dict)):
4147 postdata
= urlencode(postdata
);
4148 retrieved_body
= BytesIO();
4149 retrieved_headers
= BytesIO();
4151 if(httpmethod
=="GET"):
4152 geturls_text
= pycurl
.Curl();
4153 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4154 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4155 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4156 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4157 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4158 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4159 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4160 geturls_text
.perform();
4161 elif(httpmethod
=="POST"):
4162 geturls_text
= pycurl
.Curl();
4163 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4164 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4165 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4166 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4167 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4168 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4169 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4170 geturls_text
.setopt(geturls_text
.POST
, True);
4171 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
4172 geturls_text
.perform();
4174 geturls_text
= pycurl
.Curl();
4175 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4176 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4177 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4178 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4179 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4180 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4181 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4182 geturls_text
.perform();
4183 retrieved_headers
.seek(0);
4184 if(sys
.version
[0]=="2"):
4185 pycurlhead
= retrieved_headers
.read();
4186 if(sys
.version
[0]>="3"):
4187 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
4188 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip())[0];
4189 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
4190 retrieved_body
.seek(0);
4191 except socket
.timeout
:
4192 log
.info("Error With URL "+httpurl
);
4194 except socket
.gaierror
:
4195 log
.info("Error With URL "+httpurl
);
4198 log
.info("Error With URL "+httpurl
);
4200 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
4201 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
4202 httpversionout
= pyhttpverinfo
[0];
4203 httpmethodout
= httpmethod
;
4204 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
4205 httpheaderout
= pycurlheadersout
;
4206 httpheadersentout
= httpheaders
;
4207 if(isinstance(httpheaderout
, list)):
4208 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
4209 if(sys
.version
[0]=="2"):
4211 prehttpheaderout
= httpheaderout
;
4212 httpheaderkeys
= httpheaderout
.keys();
4213 imax
= len(httpheaderkeys
);
4217 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4219 except AttributeError:
4221 httpheaderout
= fix_header_names(httpheaderout
);
4222 if(isinstance(httpheadersentout
, list)):
4223 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
4224 httpheadersentout
= fix_header_names(httpheadersentout
);
4225 downloadsize
= httpheaderout
.get('Content-Length');
4226 if(downloadsize
is not None):
4227 downloadsize
= int(downloadsize
);
4228 if downloadsize
is None: downloadsize
= 0;
4231 log
.info("Downloading URL "+httpurl
);
4232 with
BytesIO() as strbuf
:
4234 databytes
= retrieved_body
.read(buffersize
);
4235 if not databytes
: break;
4236 datasize
= len(databytes
);
4237 fulldatasize
= datasize
+ fulldatasize
;
4240 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4241 downloaddiff
= fulldatasize
- prevdownsize
;
4242 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4243 prevdownsize
= fulldatasize
;
4244 strbuf
.write(databytes
);
4246 returnval_content
= strbuf
.read();
4247 if(httpheaderout
.get("Content-Encoding")=="gzip"):
4249 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
4252 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
4254 returnval_content
= zlib
.decompress(returnval_content
);
4257 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4259 returnval_content
= brotli
.decompress(returnval_content
);
4260 except brotli
.error
:
4262 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
4264 returnval_content
= zstandard
.decompress(returnval_content
);
4265 except zstandard
.error
:
4267 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
4269 returnval_content
= lzma
.decompress(returnval_content
);
4270 except zstandard
.error
:
4272 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
4274 returnval_content
= bz2
.decompress(returnval_content
);
4275 except zstandard
.error
:
4277 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl2"};
4278 geturls_text
.close();
4282 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4283 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4286 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4287 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4288 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4291 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4292 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4293 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4294 exec_time_start
= time
.time();
4295 myhash
= hashlib
.new("sha1");
4296 if(sys
.version
[0]=="2"):
4297 myhash
.update(httpurl
);
4298 myhash
.update(str(buffersize
));
4299 myhash
.update(str(exec_time_start
));
4300 if(sys
.version
[0]>="3"):
4301 myhash
.update(httpurl
.encode('utf-8'));
4302 myhash
.update(str(buffersize
).encode('utf-8'));
4303 myhash
.update(str(exec_time_start
).encode('utf-8'));
4304 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4306 sleep
= geturls_download_sleep
;
4309 pretmpfilename
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4310 if(not pretmpfilename
):
4312 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4313 tmpfilename
= f
.name
;
4315 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4316 except AttributeError:
4318 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4323 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4324 f
.write(pretmpfilename
.get('Content'));
4326 exec_time_end
= time
.time();
4327 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4328 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4332 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4333 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4336 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4337 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4338 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4341 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4342 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4343 global geturls_download_sleep
, havezstd
, havebrotli
;
4345 sleep
= geturls_download_sleep
;
4348 if(not outfile
=="-"):
4349 outpath
= outpath
.rstrip(os
.path
.sep
);
4350 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4351 if(not os
.path
.exists(outpath
)):
4352 os
.makedirs(outpath
);
4353 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4355 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4357 pretmpfilename
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4358 if(not pretmpfilename
):
4360 tmpfilename
= pretmpfilename
.get('Filename');
4361 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4363 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4364 exec_time_start
= time
.time();
4365 shutil
.move(tmpfilename
, filepath
);
4367 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4368 except AttributeError:
4370 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4375 exec_time_end
= time
.time();
4376 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4377 if(os
.path
.exists(tmpfilename
)):
4378 os
.remove(tmpfilename
);
4379 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4381 pretmpfilename
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4382 tmpfilename
= pretmpfilename
.get('Filename');
4383 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4386 exec_time_start
= time
.time();
4387 with
open(tmpfilename
, 'rb') as ft
:
4390 databytes
= ft
.read(buffersize
[1]);
4391 if not databytes
: break;
4392 datasize
= len(databytes
);
4393 fulldatasize
= datasize
+ fulldatasize
;
4396 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4397 downloaddiff
= fulldatasize
- prevdownsize
;
4398 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4399 prevdownsize
= fulldatasize
;
4402 fdata
= f
.getvalue();
4405 os
.remove(tmpfilename
);
4406 exec_time_end
= time
.time();
4407 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4408 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4412 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4413 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4416 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4417 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4418 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4421 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4422 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4423 global geturls_download_sleep
, havezstd
, havebrotli
;
4425 sleep
= geturls_download_sleep
;
4428 urlparts
= urlparse
.urlparse(httpurl
);
4429 if(isinstance(httpheaders
, list)):
4430 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4431 httpheaders
= fix_header_names(httpheaders
);
4432 if(httpuseragent
is not None):
4433 if('User-Agent' in httpheaders
):
4434 httpheaders
['User-Agent'] = httpuseragent
;
4436 httpuseragent
.update({'User-Agent': httpuseragent
});
4437 if(httpreferer
is not None):
4438 if('Referer' in httpheaders
):
4439 httpheaders
['Referer'] = httpreferer
;
4441 httpuseragent
.update({'Referer': httpreferer
});
4442 if(urlparts
.username
is not None or urlparts
.password
is not None):
4443 if(sys
.version
[0]=="2"):
4444 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
4445 if(sys
.version
[0]>="3"):
4446 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4447 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4448 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
4449 if(isinstance(httpheaders
, dict)):
4450 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
4451 geturls_opener
.addheaders
= httpheaders
;
4453 if(postdata
is not None and not isinstance(postdata
, dict)):
4454 postdata
= urlencode(postdata
);
4455 retrieved_body
= BytesIO();
4456 retrieved_headers
= BytesIO();
4458 if(httpmethod
=="GET"):
4459 geturls_text
= pycurl
.Curl();
4460 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4461 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4462 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4463 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4464 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4465 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4466 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4467 geturls_text
.perform();
4468 elif(httpmethod
=="POST"):
4469 geturls_text
= pycurl
.Curl();
4470 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4471 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4472 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4473 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4474 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4475 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4476 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4477 geturls_text
.setopt(geturls_text
.POST
, True);
4478 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
4479 geturls_text
.perform();
4481 geturls_text
= pycurl
.Curl();
4482 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4483 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4484 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4485 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4486 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4487 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4488 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4489 geturls_text
.perform();
4490 retrieved_headers
.seek(0);
4491 if(sys
.version
[0]=="2"):
4492 pycurlhead
= retrieved_headers
.read();
4493 if(sys
.version
[0]>="3"):
4494 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
4495 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip().rstrip('\r\n'))[0];
4496 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
4497 retrieved_body
.seek(0);
4498 except socket
.timeout
:
4499 log
.info("Error With URL "+httpurl
);
4501 except socket
.gaierror
:
4502 log
.info("Error With URL "+httpurl
);
4505 log
.info("Error With URL "+httpurl
);
4507 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
4508 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
4509 httpversionout
= pyhttpverinfo
[0];
4510 httpmethodout
= httpmethod
;
4511 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
4512 httpheaderout
= pycurlheadersout
;
4513 httpheadersentout
= httpheaders
;
4514 if(isinstance(httpheaderout
, list)):
4515 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
4516 if(sys
.version
[0]=="2"):
4518 prehttpheaderout
= httpheaderout
;
4519 httpheaderkeys
= httpheaderout
.keys();
4520 imax
= len(httpheaderkeys
);
4524 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4526 except AttributeError:
4528 httpheaderout
= fix_header_names(httpheaderout
);
4529 if(isinstance(httpheadersentout
, list)):
4530 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
4531 httpheadersentout
= fix_header_names(httpheadersentout
);
4532 downloadsize
= httpheaderout
.get('Content-Length');
4533 if(downloadsize
is not None):
4534 downloadsize
= int(downloadsize
);
4535 if downloadsize
is None: downloadsize
= 0;
4538 log
.info("Downloading URL "+httpurl
);
4539 with
BytesIO() as strbuf
:
4541 databytes
= retrieved_body
.read(buffersize
);
4542 if not databytes
: break;
4543 datasize
= len(databytes
);
4544 fulldatasize
= datasize
+ fulldatasize
;
4547 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4548 downloaddiff
= fulldatasize
- prevdownsize
;
4549 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4550 prevdownsize
= fulldatasize
;
4551 strbuf
.write(databytes
);
4553 returnval_content
= strbuf
.read();
4554 if(httpheaderout
.get("Content-Encoding")=="gzip"):
4556 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
4559 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
4561 returnval_content
= zlib
.decompress(returnval_content
);
4564 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4566 returnval_content
= brotli
.decompress(returnval_content
);
4567 except brotli
.error
:
4569 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
4571 returnval_content
= zstandard
.decompress(returnval_content
);
4572 except zstandard
.error
:
4574 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
4576 returnval_content
= lzma
.decompress(returnval_content
);
4577 except zstandard
.error
:
4579 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
4581 returnval_content
= bz2
.decompress(returnval_content
);
4582 except zstandard
.error
:
4584 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl3"};
4585 geturls_text
.close();
4589 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4590 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4593 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4594 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4595 returnval
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4598 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4599 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4600 returnval
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4603 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4604 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4605 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4606 exec_time_start
= time
.time();
4607 myhash
= hashlib
.new("sha1");
4608 if(sys
.version
[0]=="2"):
4609 myhash
.update(httpurl
);
4610 myhash
.update(str(buffersize
));
4611 myhash
.update(str(exec_time_start
));
4612 if(sys
.version
[0]>="3"):
4613 myhash
.update(httpurl
.encode('utf-8'));
4614 myhash
.update(str(buffersize
).encode('utf-8'));
4615 myhash
.update(str(exec_time_start
).encode('utf-8'));
4616 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4618 sleep
= geturls_download_sleep
;
4621 pretmpfilename
= download_from_url_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4622 if(not pretmpfilename
):
4624 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4625 tmpfilename
= f
.name
;
4627 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4628 except AttributeError:
4630 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4635 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4636 f
.write(pretmpfilename
.get('Content'));
4638 exec_time_end
= time
.time();
4639 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4640 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4644 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4645 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4648 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4649 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4650 returnval
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4653 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4654 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4655 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4658 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4659 def download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4660 global geturls_download_sleep
, havezstd
, havebrotli
;
4662 sleep
= geturls_download_sleep
;
4665 if(not outfile
=="-"):
4666 outpath
= outpath
.rstrip(os
.path
.sep
);
4667 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4668 if(not os
.path
.exists(outpath
)):
4669 os
.makedirs(outpath
);
4670 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4672 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4674 pretmpfilename
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4675 if(not pretmpfilename
):
4677 tmpfilename
= pretmpfilename
.get('Filename');
4678 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4680 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4681 exec_time_start
= time
.time();
4682 shutil
.move(tmpfilename
, filepath
);
4684 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4685 except AttributeError:
4687 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4692 exec_time_end
= time
.time();
4693 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4694 if(os
.path
.exists(tmpfilename
)):
4695 os
.remove(tmpfilename
);
4696 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4698 pretmpfilename
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4699 tmpfilename
= pretmpfilename
.get('Filename');
4700 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4703 exec_time_start
= time
.time();
4704 with
open(tmpfilename
, 'rb') as ft
:
4707 databytes
= ft
.read(buffersize
[1]);
4708 if not databytes
: break;
4709 datasize
= len(databytes
);
4710 fulldatasize
= datasize
+ fulldatasize
;
4713 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4714 downloaddiff
= fulldatasize
- prevdownsize
;
4715 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4716 prevdownsize
= fulldatasize
;
4719 fdata
= f
.getvalue();
4722 os
.remove(tmpfilename
);
4723 exec_time_end
= time
.time();
4724 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4725 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4729 def download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4730 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4733 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4734 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4735 returnval
= download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4738 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4739 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4740 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4743 def download_file_from_ftp_file(url
):
4744 urlparts
= urlparse
.urlparse(url
);
4745 file_name
= os
.path
.basename(urlparts
.path
);
4746 file_dir
= os
.path
.dirname(urlparts
.path
);
4747 if(urlparts
.username
is not None):
4748 ftp_username
= urlparts
.username
;
4750 ftp_username
= "anonymous";
4751 if(urlparts
.password
is not None):
4752 ftp_password
= urlparts
.password
;
4753 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4754 ftp_password
= "anonymous";
4757 if(urlparts
.scheme
=="ftp"):
4759 elif(urlparts
.scheme
=="ftps"):
4763 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4765 ftp_port
= urlparts
.port
;
4766 if(urlparts
.port
is None):
4769 ftp
.connect(urlparts
.hostname
, ftp_port
);
4770 except socket
.gaierror
:
4771 log
.info("Error With URL "+httpurl
);
4773 except socket
.timeout
:
4774 log
.info("Error With URL "+httpurl
);
4776 ftp
.login(urlparts
.username
, urlparts
.password
);
4777 if(urlparts
.scheme
=="ftps"):
4779 ftpfile
= BytesIO();
4780 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
4781 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
4786 def download_file_from_ftp_string(url
):
4787 ftpfile
= download_file_from_ftp_file(url
);
4788 return ftpfile
.read();
4790 def download_from_url_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4791 global geturls_download_sleep
, havezstd
, havebrotli
;
4793 sleep
= geturls_download_sleep
;
4796 urlparts
= urlparse
.urlparse(httpurl
);
4797 if(isinstance(httpheaders
, list)):
4798 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4799 httpheaders
= fix_header_names(httpheaders
);
4800 if(httpuseragent
is not None):
4801 if('User-Agent' in httpheaders
):
4802 httpheaders
['User-Agent'] = httpuseragent
;
4804 httpuseragent
.update({'User-Agent': httpuseragent
});
4805 if(httpreferer
is not None):
4806 if('Referer' in httpheaders
):
4807 httpheaders
['Referer'] = httpreferer
;
4809 httpuseragent
.update({'Referer': httpreferer
});
4810 if(isinstance(httpheaders
, dict)):
4811 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4813 geturls_text
= download_file_from_ftp_file(httpurl
);
4814 if(not geturls_text
):
4816 downloadsize
= None;
4817 if(downloadsize
is not None):
4818 downloadsize
= int(downloadsize
);
4819 if downloadsize
is None: downloadsize
= 0;
4822 log
.info("Downloading URL "+httpurl
);
4823 with
BytesIO() as strbuf
:
4825 databytes
= geturls_text
.read(buffersize
);
4826 if not databytes
: break;
4827 datasize
= len(databytes
);
4828 fulldatasize
= datasize
+ fulldatasize
;
4831 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4832 downloaddiff
= fulldatasize
- prevdownsize
;
4833 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4834 prevdownsize
= fulldatasize
;
4835 strbuf
.write(databytes
);
4837 returnval_content
= strbuf
.read();
4838 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4839 geturls_text
.close();
4842 def download_from_url_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4843 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4844 exec_time_start
= time
.time();
4845 myhash
= hashlib
.new("sha1");
4846 if(sys
.version
[0]=="2"):
4847 myhash
.update(httpurl
);
4848 myhash
.update(str(buffersize
));
4849 myhash
.update(str(exec_time_start
));
4850 if(sys
.version
[0]>="3"):
4851 myhash
.update(httpurl
.encode('utf-8'));
4852 myhash
.update(str(buffersize
).encode('utf-8'));
4853 myhash
.update(str(exec_time_start
).encode('utf-8'));
4854 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4856 sleep
= geturls_download_sleep
;
4859 pretmpfilename
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4860 if(not pretmpfilename
):
4862 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4863 tmpfilename
= f
.name
;
4865 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4866 except AttributeError:
4868 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4873 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4874 f
.write(pretmpfilename
.get('Content'));
4876 exec_time_end
= time
.time();
4877 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4878 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4881 def download_from_url_to_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4882 global geturls_download_sleep
, havezstd
, havebrotli
;
4884 sleep
= geturls_download_sleep
;
4887 if(not outfile
=="-"):
4888 outpath
= outpath
.rstrip(os
.path
.sep
);
4889 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4890 if(not os
.path
.exists(outpath
)):
4891 os
.makedirs(outpath
);
4892 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4894 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4896 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4897 if(not pretmpfilename
):
4899 tmpfilename
= pretmpfilename
.get('Filename');
4900 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4902 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4903 exec_time_start
= time
.time();
4904 shutil
.move(tmpfilename
, filepath
);
4905 exec_time_end
= time
.time();
4906 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4907 if(os
.path
.exists(tmpfilename
)):
4908 os
.remove(tmpfilename
);
4909 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4911 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4912 tmpfilename
= pretmpfilename
.get('Filename');
4913 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4916 exec_time_start
= time
.time();
4917 with
open(tmpfilename
, 'rb') as ft
:
4920 databytes
= ft
.read(buffersize
[1]);
4921 if not databytes
: break;
4922 datasize
= len(databytes
);
4923 fulldatasize
= datasize
+ fulldatasize
;
4926 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4927 downloaddiff
= fulldatasize
- prevdownsize
;
4928 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4929 prevdownsize
= fulldatasize
;
4932 fdata
= f
.getvalue();
4935 os
.remove(tmpfilename
);
4936 exec_time_end
= time
.time();
4937 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4938 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4941 def upload_file_to_ftp_file(ftpfile
, url
):
4942 urlparts
= urlparse
.urlparse(url
);
4943 file_name
= os
.path
.basename(urlparts
.path
);
4944 file_dir
= os
.path
.dirname(urlparts
.path
);
4945 if(urlparts
.username
is not None):
4946 ftp_username
= urlparts
.username
;
4948 ftp_username
= "anonymous";
4949 if(urlparts
.password
is not None):
4950 ftp_password
= urlparts
.password
;
4951 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4952 ftp_password
= "anonymous";
4955 if(urlparts
.scheme
=="ftp"):
4957 elif(urlparts
.scheme
=="ftps"):
4961 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4963 ftp_port
= urlparts
.port
;
4964 if(urlparts
.port
is None):
4967 ftp
.connect(urlparts
.hostname
, ftp_port
);
4968 except socket
.gaierror
:
4969 log
.info("Error With URL "+httpurl
);
4971 except socket
.timeout
:
4972 log
.info("Error With URL "+httpurl
);
4974 ftp
.login(urlparts
.username
, urlparts
.password
);
4975 if(urlparts
.scheme
=="ftps"):
4977 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
4982 def upload_file_to_ftp_string(ftpstring
, url
):
4983 ftpfileo
= BytesIO(ftpstring
);
4984 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
4989 def download_file_from_sftp_file(url
):
4990 urlparts
= urlparse
.urlparse(url
);
4991 file_name
= os
.path
.basename(urlparts
.path
);
4992 file_dir
= os
.path
.dirname(urlparts
.path
);
4993 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4995 sftp_port
= urlparts
.port
;
4996 if(urlparts
.port
is None):
4999 sftp_port
= urlparts
.port
;
5000 if(urlparts
.username
is not None):
5001 sftp_username
= urlparts
.username
;
5003 sftp_username
= "anonymous";
5004 if(urlparts
.password
is not None):
5005 sftp_password
= urlparts
.password
;
5006 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5007 sftp_password
= "anonymous";
5010 if(urlparts
.scheme
!="sftp"):
5012 ssh
= paramiko
.SSHClient();
5013 ssh
.load_system_host_keys();
5014 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
5016 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5017 except paramiko
.ssh_exception
.SSHException
:
5019 except socket
.gaierror
:
5020 log
.info("Error With URL "+httpurl
);
5022 except socket
.timeout
:
5023 log
.info("Error With URL "+httpurl
);
5025 sftp
= ssh
.open_sftp();
5026 sftpfile
= BytesIO();
5027 sftp
.getfo(urlparts
.path
, sftpfile
);
5030 sftpfile
.seek(0, 0);
5033 def download_file_from_sftp_file(url
):
5037 def download_file_from_sftp_string(url
):
5038 sftpfile
= download_file_from_sftp_file(url
);
5039 return sftpfile
.read();
5041 def download_file_from_ftp_string(url
):
5045 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5046 global geturls_download_sleep
, havezstd
, havebrotli
;
5048 sleep
= geturls_download_sleep
;
5051 urlparts
= urlparse
.urlparse(httpurl
);
5052 if(isinstance(httpheaders
, list)):
5053 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
5054 httpheaders
= fix_header_names(httpheaders
);
5055 if(httpuseragent
is not None):
5056 if('User-Agent' in httpheaders
):
5057 httpheaders
['User-Agent'] = httpuseragent
;
5059 httpuseragent
.update({'User-Agent': httpuseragent
});
5060 if(httpreferer
is not None):
5061 if('Referer' in httpheaders
):
5062 httpheaders
['Referer'] = httpreferer
;
5064 httpuseragent
.update({'Referer': httpreferer
});
5065 if(isinstance(httpheaders
, dict)):
5066 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
5068 geturls_text
= download_file_from_sftp_file(httpurl
);
5069 if(not geturls_text
):
5071 downloadsize
= None;
5072 if(downloadsize
is not None):
5073 downloadsize
= int(downloadsize
);
5074 if downloadsize
is None: downloadsize
= 0;
5077 log
.info("Downloading URL "+httpurl
);
5078 with
BytesIO() as strbuf
:
5080 databytes
= geturls_text
.read(buffersize
);
5081 if not databytes
: break;
5082 datasize
= len(databytes
);
5083 fulldatasize
= datasize
+ fulldatasize
;
5086 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5087 downloaddiff
= fulldatasize
- prevdownsize
;
5088 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5089 prevdownsize
= fulldatasize
;
5090 strbuf
.write(databytes
);
5092 returnval_content
= strbuf
.read();
5093 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5094 geturls_text
.close();
5097 if(not haveparamiko
):
5098 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5102 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5103 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
5104 exec_time_start
= time
.time();
5105 myhash
= hashlib
.new("sha1");
5106 if(sys
.version
[0]=="2"):
5107 myhash
.update(httpurl
);
5108 myhash
.update(str(buffersize
));
5109 myhash
.update(str(exec_time_start
));
5110 if(sys
.version
[0]>="3"):
5111 myhash
.update(httpurl
.encode('utf-8'));
5112 myhash
.update(str(buffersize
).encode('utf-8'));
5113 myhash
.update(str(exec_time_start
).encode('utf-8'));
5114 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
5116 sleep
= geturls_download_sleep
;
5119 pretmpfilename
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
5120 if(not pretmpfilename
):
5122 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
5123 tmpfilename
= f
.name
;
5125 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
5126 except AttributeError:
5128 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
5133 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5134 f
.write(pretmpfilename
.get('Content'));
5136 exec_time_end
= time
.time();
5137 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
5138 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
5141 if(not haveparamiko
):
5142 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5146 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5147 global geturls_download_sleep
, havezstd
, havebrotli
;
5149 sleep
= geturls_download_sleep
;
5152 if(not outfile
=="-"):
5153 outpath
= outpath
.rstrip(os
.path
.sep
);
5154 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
5155 if(not os
.path
.exists(outpath
)):
5156 os
.makedirs(outpath
);
5157 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
5159 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
5161 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5162 if(not pretmpfilename
):
5164 tmpfilename
= pretmpfilename
.get('Filename');
5165 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5167 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
5168 exec_time_start
= time
.time();
5169 shutil
.move(tmpfilename
, filepath
);
5170 exec_time_end
= time
.time();
5171 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
5172 if(os
.path
.exists(tmpfilename
)):
5173 os
.remove(tmpfilename
);
5174 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5176 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5177 tmpfilename
= pretmpfilename
.get('Filename');
5178 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5181 exec_time_start
= time
.time();
5182 with
open(tmpfilename
, 'rb') as ft
:
5185 databytes
= ft
.read(buffersize
[1]);
5186 if not databytes
: break;
5187 datasize
= len(databytes
);
5188 fulldatasize
= datasize
+ fulldatasize
;
5191 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5192 downloaddiff
= fulldatasize
- prevdownsize
;
5193 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5194 prevdownsize
= fulldatasize
;
5197 fdata
= f
.getvalue();
5200 os
.remove(tmpfilename
);
5201 exec_time_end
= time
.time();
5202 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5203 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5206 if(not haveparamiko
):
5207 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5211 def upload_file_to_sftp_file(sftpfile
, url
):
5212 urlparts
= urlparse
.urlparse(url
);
5213 file_name
= os
.path
.basename(urlparts
.path
);
5214 file_dir
= os
.path
.dirname(urlparts
.path
);
5215 sftp_port
= urlparts
.port
;
5216 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5218 if(urlparts
.port
is None):
5221 sftp_port
= urlparts
.port
;
5222 if(urlparts
.username
is not None):
5223 sftp_username
= urlparts
.username
;
5225 sftp_username
= "anonymous";
5226 if(urlparts
.password
is not None):
5227 sftp_password
= urlparts
.password
;
5228 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5229 sftp_password
= "anonymous";
5232 if(urlparts
.scheme
!="sftp"):
5234 ssh
= paramiko
.SSHClient();
5235 ssh
.load_system_host_keys();
5236 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
5238 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5239 except paramiko
.ssh_exception
.SSHException
:
5241 except socket
.gaierror
:
5242 log
.info("Error With URL "+httpurl
);
5244 except socket
.timeout
:
5245 log
.info("Error With URL "+httpurl
);
5247 sftp
= ssh
.open_sftp();
5248 sftp
.putfo(sftpfile
, urlparts
.path
);
5251 sftpfile
.seek(0, 0);
5254 def upload_file_to_sftp_file(sftpfile
, url
):
5258 def upload_file_to_sftp_string(sftpstring
, url
):
5259 sftpfileo
= BytesIO(sftpstring
);
5260 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
5264 def upload_file_to_sftp_string(url
):
5269 def download_file_from_pysftp_file(url
):
5270 urlparts
= urlparse
.urlparse(url
);
5271 file_name
= os
.path
.basename(urlparts
.path
);
5272 file_dir
= os
.path
.dirname(urlparts
.path
);
5273 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5275 sftp_port
= urlparts
.port
;
5276 if(urlparts
.port
is None):
5279 sftp_port
= urlparts
.port
;
5280 if(urlparts
.username
is not None):
5281 sftp_username
= urlparts
.username
;
5283 sftp_username
= "anonymous";
5284 if(urlparts
.password
is not None):
5285 sftp_password
= urlparts
.password
;
5286 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5287 sftp_password
= "anonymous";
5290 if(urlparts
.scheme
!="sftp"):
5293 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5294 except paramiko
.ssh_exception
.SSHException
:
5296 except socket
.gaierror
:
5297 log
.info("Error With URL "+httpurl
);
5299 except socket
.timeout
:
5300 log
.info("Error With URL "+httpurl
);
5302 sftp
= ssh
.open_sftp();
5303 sftpfile
= BytesIO();
5304 sftp
.getfo(urlparts
.path
, sftpfile
);
5307 sftpfile
.seek(0, 0);
5310 def download_file_from_pysftp_file(url
):
5314 def download_file_from_pysftp_string(url
):
5315 sftpfile
= download_file_from_pysftp_file(url
);
5316 return sftpfile
.read();
5318 def download_file_from_ftp_string(url
):
5322 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5323 global geturls_download_sleep
, havezstd
, havebrotli
;
5325 sleep
= geturls_download_sleep
;
5328 urlparts
= urlparse
.urlparse(httpurl
);
5329 if(isinstance(httpheaders
, list)):
5330 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
5331 httpheaders
= fix_header_names(httpheaders
);
5332 if(isinstance(httpheaders
, dict)):
5333 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
5335 geturls_text
= download_file_from_pysftp_file(httpurl
);
5336 if(not geturls_text
):
5338 downloadsize
= None;
5339 if(downloadsize
is not None):
5340 downloadsize
= int(downloadsize
);
5341 if downloadsize
is None: downloadsize
= 0;
5344 log
.info("Downloading URL "+httpurl
);
5345 with
BytesIO() as strbuf
:
5347 databytes
= geturls_text
.read(buffersize
);
5348 if not databytes
: break;
5349 datasize
= len(databytes
);
5350 fulldatasize
= datasize
+ fulldatasize
;
5353 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5354 downloaddiff
= fulldatasize
- prevdownsize
;
5355 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5356 prevdownsize
= fulldatasize
;
5357 strbuf
.write(databytes
);
5359 returnval_content
= strbuf
.read();
5360 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5361 geturls_text
.close();
5365 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5369 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5370 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
5371 exec_time_start
= time
.time();
5372 myhash
= hashlib
.new("sha1");
5373 if(sys
.version
[0]=="2"):
5374 myhash
.update(httpurl
);
5375 myhash
.update(str(buffersize
));
5376 myhash
.update(str(exec_time_start
));
5377 if(sys
.version
[0]>="3"):
5378 myhash
.update(httpurl
.encode('utf-8'));
5379 myhash
.update(str(buffersize
).encode('utf-8'));
5380 myhash
.update(str(exec_time_start
).encode('utf-8'));
5381 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
5383 sleep
= geturls_download_sleep
;
5386 pretmpfilename
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
5387 if(not pretmpfilename
):
5389 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
5390 tmpfilename
= f
.name
;
5392 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
5393 except AttributeError:
5395 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
5400 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5401 f
.write(pretmpfilename
.get('Content'));
5403 exec_time_end
= time
.time();
5404 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
5405 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
5409 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5413 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5414 global geturls_download_sleep
, havezstd
, havebrotli
;
5416 sleep
= geturls_download_sleep
;
5419 if(not outfile
=="-"):
5420 outpath
= outpath
.rstrip(os
.path
.sep
);
5421 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
5422 if(not os
.path
.exists(outpath
)):
5423 os
.makedirs(outpath
);
5424 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
5426 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
5428 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5429 if(not pretmpfilename
):
5431 tmpfilename
= pretmpfilename
.get('Filename');
5432 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5434 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
5435 exec_time_start
= time
.time();
5436 shutil
.move(tmpfilename
, filepath
);
5437 exec_time_end
= time
.time();
5438 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
5439 if(os
.path
.exists(tmpfilename
)):
5440 os
.remove(tmpfilename
);
5441 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5443 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5444 tmpfilename
= pretmpfilename
.get('Filename');
5445 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5448 exec_time_start
= time
.time();
5449 with
open(tmpfilename
, 'rb') as ft
:
5452 databytes
= ft
.read(buffersize
[1]);
5453 if not databytes
: break;
5454 datasize
= len(databytes
);
5455 fulldatasize
= datasize
+ fulldatasize
;
5458 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5459 downloaddiff
= fulldatasize
- prevdownsize
;
5460 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5461 prevdownsize
= fulldatasize
;
5464 fdata
= f
.getvalue();
5467 os
.remove(tmpfilename
);
5468 exec_time_end
= time
.time();
5469 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5470 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5474 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5478 def upload_file_to_pysftp_file(sftpfile
, url
):
5479 urlparts
= urlparse
.urlparse(url
);
5480 file_name
= os
.path
.basename(urlparts
.path
);
5481 file_dir
= os
.path
.dirname(urlparts
.path
);
5482 sftp_port
= urlparts
.port
;
5483 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5485 if(urlparts
.port
is None):
5488 sftp_port
= urlparts
.port
;
5489 if(urlparts
.username
is not None):
5490 sftp_username
= urlparts
.username
;
5492 sftp_username
= "anonymous";
5493 if(urlparts
.password
is not None):
5494 sftp_password
= urlparts
.password
;
5495 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5496 sftp_password
= "anonymous";
5499 if(urlparts
.scheme
!="sftp"):
5502 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5503 except paramiko
.ssh_exception
.SSHException
:
5505 except socket
.gaierror
:
5506 log
.info("Error With URL "+httpurl
);
5508 except socket
.timeout
:
5509 log
.info("Error With URL "+httpurl
);
5511 sftp
= ssh
.open_sftp();
5512 sftp
.putfo(sftpfile
, urlparts
.path
);
5515 sftpfile
.seek(0, 0);
5518 def upload_file_to_pysftp_file(sftpfile
, url
):
5522 def upload_file_to_pysftp_string(sftpstring
, url
):
5523 sftpfileo
= BytesIO(sftpstring
);
5524 sftpfile
= upload_file_to_pysftp_files(ftpfileo
, url
);
5528 def upload_file_to_pysftp_string(url
):