4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwgetold.py - Last Update: 10/5/2023 Ver. 2.0.2 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, zlib
, time
, argparse
, cgi
, subprocess
, socket
, email
.utils
, datetime
, time
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
29 havemechanize
= False;
34 havemechanize
= False;
62 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
102 if(sys
.version
[0]=="2"):
104 from io
import StringIO
, BytesIO
;
107 from cStringIO
import StringIO
;
108 from cStringIO
import StringIO
as BytesIO
;
110 from StringIO
import StringIO
;
111 from StringIO
import StringIO
as BytesIO
;
112 # From http://python-future.org/compatible_idioms.html
113 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
114 from urllib
import urlencode
;
115 from urllib
import urlopen
as urlopenalt
;
116 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
, URLError
, build_opener
, HTTPCookieProcessor
;
117 import urlparse
, cookielib
;
118 from httplib
import HTTPConnection
, HTTPSConnection
;
119 if(sys
.version
[0]>="3"):
120 from io
import StringIO
, BytesIO
;
121 # From http://python-future.org/compatible_idioms.html
122 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
123 from urllib
.request
import urlopen
, Request
, install_opener
, build_opener
, HTTPCookieProcessor
;
124 from urllib
.error
import HTTPError
, URLError
;
125 import urllib
.parse
as urlparse
;
126 import http
.cookiejar
as cookielib
;
127 from http
.client
import HTTPConnection
, HTTPSConnection
;
129 __program_name__
= "PyWWW-Get";
130 __program_alt_name__
= "PyWWWGet";
131 __program_small_name__
= "wwwget";
132 __project__
= __program_name__
;
133 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
134 __version_info__
= (2, 0, 2, "RC 1", 1);
135 __version_date_info__
= (2023, 10, 5, "RC 1", 1);
136 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
137 __revision__
= __version_info__
[3];
138 __revision_id__
= "$Id$";
139 if(__version_info__
[4] is not None):
140 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
141 if(__version_info__
[4] is None):
142 __version_date_plusrc__
= __version_date__
;
143 if(__version_info__
[3] is not None):
144 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
145 if(__version_info__
[3] is None):
146 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
148 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
150 pytempdir
= tempfile
.gettempdir();
152 PyBitness
= platform
.architecture();
153 if(PyBitness
=="32bit" or PyBitness
=="32"):
155 elif(PyBitness
=="64bit" or PyBitness
=="64"):
160 compression_supported_list
= ['identity', 'gzip', 'deflate', 'bzip2'];
162 compression_supported_list
.append('br');
164 compression_supported_list
.append('zstd');
166 compression_supported_list
.append('lzma');
167 compression_supported_list
.append('xz');
168 compression_supported
= ', '.join(compression_supported_list
);
170 geturls_cj
= cookielib
.CookieJar();
171 windowsNT4_ua_string
= "Windows NT 4.0";
172 windowsNT4_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
173 windows2k_ua_string
= "Windows NT 5.0";
174 windows2k_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
175 windowsXP_ua_string
= "Windows NT 5.1";
176 windowsXP_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
177 windowsXP64_ua_string
= "Windows NT 5.2; Win64; x64";
178 windowsXP64_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
179 windows7_ua_string
= "Windows NT 6.1; Win64; x64";
180 windows7_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
181 windows8_ua_string
= "Windows NT 6.2; Win64; x64";
182 windows8_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
183 windows81_ua_string
= "Windows NT 6.3; Win64; x64";
184 windows81_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
185 windows10_ua_string
= "Windows NT 10.0; Win64; x64";
186 windows10_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
187 windows11_ua_string
= "Windows NT 11.0; Win64; x64";
188 windows11_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
189 geturls_ua_firefox_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:109.0) Gecko/20100101 Firefox/117.0";
190 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
191 geturls_ua_chrome_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
192 geturls_ua_chromium_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
193 geturls_ua_palemoon_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
194 geturls_ua_opera_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
195 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
196 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; Trident/7.0; rv:11.0) like Gecko";
197 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
198 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
199 if(platform
.python_implementation()!=""):
200 py_implementation
= platform
.python_implementation();
201 if(platform
.python_implementation()==""):
202 py_implementation
= "Python";
203 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=py_implementation
, pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
204 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
205 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
206 geturls_ua
= geturls_ua_firefox_windows7
;
207 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
208 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
209 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
210 geturls_headers_chrome_windows7
.update(windows7_ua_addon
);
211 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
212 geturls_headers_chromium_windows7
.update(windows7_ua_addon
);
213 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
214 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
215 geturls_headers_opera_windows7
.update(windows7_ua_addon
);
216 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
217 geturls_headers_vivaldi_windows7
.update(windows7_ua_addon
);
218 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
219 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
220 geturls_headers_microsoft_edge_windows7
.update(windows7_ua_addon
);
221 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
222 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
223 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
224 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
225 geturls_headers
= geturls_headers_firefox_windows7
;
226 geturls_download_sleep
= 0;
228 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
229 if(outtype
=="print" and dbgenable
):
232 elif(outtype
=="log" and dbgenable
):
233 logging
.info(dbgtxt
);
235 elif(outtype
=="warning" and dbgenable
):
236 logging
.warning(dbgtxt
);
238 elif(outtype
=="error" and dbgenable
):
239 logging
.error(dbgtxt
);
241 elif(outtype
=="critical" and dbgenable
):
242 logging
.critical(dbgtxt
);
244 elif(outtype
=="exception" and dbgenable
):
245 logging
.exception(dbgtxt
);
247 elif(outtype
=="logalt" and dbgenable
):
248 logging
.log(dgblevel
, dbgtxt
);
250 elif(outtype
=="debug" and dbgenable
):
251 logging
.debug(dbgtxt
);
259 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
260 dbgout
= verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
265 def add_url_param(url
, **params
):
267 parts
= list(urlparse
.urlsplit(url
));
268 d
= dict(cgi
.parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
270 parts
[n
]=urlencode(d
);
271 return urlparse
.urlunsplit(parts
);
273 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
274 def which_exec(execfile):
275 for path
in os
.environ
["PATH"].split(":"):
276 if os
.path
.exists(path
+ "/" + execfile):
277 return path
+ "/" + execfile;
279 def listize(varlist
):
287 newlistreg
.update({ilx
: varlist
[il
]});
288 newlistrev
.update({varlist
[il
]: ilx
});
291 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
294 def twolistize(varlist
):
304 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
305 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
306 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
307 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
310 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
311 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
312 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
315 def arglistize(proexec
, *varlist
):
319 newarglist
= [proexec
];
321 if varlist
[il
][0] is not None:
322 newarglist
.append(varlist
[il
][0]);
323 if varlist
[il
][1] is not None:
324 newarglist
.append(varlist
[il
][1]);
328 def fix_header_names(header_dict
):
329 if(sys
.version
[0]=="2"):
330 header_dict
= {k
.title(): v
for k
, v
in header_dict
.iteritems()};
331 if(sys
.version
[0]>="3"):
332 header_dict
= {k
.title(): v
for k
, v
in header_dict
.items()};
335 # hms_string by ArcGIS Python Recipes
336 # https://arcpy.wordpress.com/2012/04/20/146/
337 def hms_string(sec_elapsed
):
338 h
= int(sec_elapsed
/ (60 * 60));
339 m
= int((sec_elapsed
% (60 * 60)) / 60);
340 s
= sec_elapsed
% 60.0;
341 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
343 # get_readable_size by Lipis
344 # http://stackoverflow.com/posts/14998888/revisions
345 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
347 if(unit
!="IEC" and unit
!="SI"):
350 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
351 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
354 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
355 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
360 if abs(bytes
) < unitsize
:
361 strformat
= "%3."+str(precision
)+"f%s";
362 pre_return_val
= (strformat
% (bytes
, unit
));
363 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
364 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
365 alt_return_val
= pre_return_val
.split();
366 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
369 strformat
= "%."+str(precision
)+"f%s";
370 pre_return_val
= (strformat
% (bytes
, "YiB"));
371 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
372 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
373 alt_return_val
= pre_return_val
.split();
374 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
377 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
379 usehashtypes
= usehashtypes
.lower();
380 getfilesize
= os
.path
.getsize(infile
);
381 return_val
= get_readable_size(getfilesize
, precision
, unit
);
383 hashtypelist
= usehashtypes
.split(",");
384 openfile
= open(infile
, "rb");
385 filecontents
= openfile
.read();
388 listnumend
= len(hashtypelist
);
389 while(listnumcount
< listnumend
):
390 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
391 hashtypelistup
= hashtypelistlow
.upper();
392 filehash
= hashlib
.new(hashtypelistup
);
393 filehash
.update(filecontents
);
394 filegethash
= filehash
.hexdigest();
395 return_val
.update({hashtypelistup
: filegethash
});
399 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
401 usehashtypes
= usehashtypes
.lower();
402 getfilesize
= len(instring
);
403 return_val
= get_readable_size(getfilesize
, precision
, unit
);
405 hashtypelist
= usehashtypes
.split(",");
407 listnumend
= len(hashtypelist
);
408 while(listnumcount
< listnumend
):
409 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
410 hashtypelistup
= hashtypelistlow
.upper();
411 filehash
= hashlib
.new(hashtypelistup
);
412 if(sys
.version
[0]=="2"):
413 filehash
.update(instring
);
414 if(sys
.version
[0]>="3"):
415 filehash
.update(instring
.encode('utf-8'));
416 filegethash
= filehash
.hexdigest();
417 return_val
.update({hashtypelistup
: filegethash
});
421 def http_status_to_reason(code
):
424 101: 'Switching Protocols',
429 203: 'Non-Authoritative Information',
431 205: 'Reset Content',
432 206: 'Partial Content',
434 208: 'Already Reported',
436 300: 'Multiple Choices',
437 301: 'Moved Permanently',
442 307: 'Temporary Redirect',
443 308: 'Permanent Redirect',
446 402: 'Payment Required',
449 405: 'Method Not Allowed',
450 406: 'Not Acceptable',
451 407: 'Proxy Authentication Required',
452 408: 'Request Timeout',
455 411: 'Length Required',
456 412: 'Precondition Failed',
457 413: 'Payload Too Large',
459 415: 'Unsupported Media Type',
460 416: 'Range Not Satisfiable',
461 417: 'Expectation Failed',
462 421: 'Misdirected Request',
463 422: 'Unprocessable Entity',
465 424: 'Failed Dependency',
466 426: 'Upgrade Required',
467 428: 'Precondition Required',
468 429: 'Too Many Requests',
469 431: 'Request Header Fields Too Large',
470 451: 'Unavailable For Legal Reasons',
471 500: 'Internal Server Error',
472 501: 'Not Implemented',
474 503: 'Service Unavailable',
475 504: 'Gateway Timeout',
476 505: 'HTTP Version Not Supported',
477 506: 'Variant Also Negotiates',
478 507: 'Insufficient Storage',
479 508: 'Loop Detected',
481 511: 'Network Authentication Required'
483 return reasons
.get(code
, 'Unknown Status Code');
485 def ftp_status_to_reason(code
):
487 110: 'Restart marker reply',
488 120: 'Service ready in nnn minutes',
489 125: 'Data connection already open; transfer starting',
490 150: 'File status okay; about to open data connection',
492 202: 'Command not implemented, superfluous at this site',
493 211: 'System status, or system help reply',
494 212: 'Directory status',
497 215: 'NAME system type',
498 220: 'Service ready for new user',
499 221: 'Service closing control connection',
500 225: 'Data connection open; no transfer in progress',
501 226: 'Closing data connection',
502 227: 'Entering Passive Mode',
503 230: 'User logged in, proceed',
504 250: 'Requested file action okay, completed',
505 257: '"PATHNAME" created',
506 331: 'User name okay, need password',
507 332: 'Need account for login',
508 350: 'Requested file action pending further information',
509 421: 'Service not available, closing control connection',
510 425: 'Can\'t open data connection',
511 426: 'Connection closed; transfer aborted',
512 450: 'Requested file action not taken',
513 451: 'Requested action aborted. Local error in processing',
514 452: 'Requested action not taken. Insufficient storage space in system',
515 500: 'Syntax error, command unrecognized',
516 501: 'Syntax error in parameters or arguments',
517 502: 'Command not implemented',
518 503: 'Bad sequence of commands',
519 504: 'Command not implemented for that parameter',
520 530: 'Not logged in',
521 532: 'Need account for storing files',
522 550: 'Requested action not taken. File unavailable',
523 551: 'Requested action aborted. Page type unknown',
524 552: 'Requested file action aborted. Exceeded storage allocation',
525 553: 'Requested action not taken. File name not allowed'
527 return reasons
.get(code
, 'Unknown Status Code');
529 def sftp_status_to_reason(code
):
533 2: 'SSH_FX_NO_SUCH_FILE',
534 3: 'SSH_FX_PERMISSION_DENIED',
536 5: 'SSH_FX_BAD_MESSAGE',
537 6: 'SSH_FX_NO_CONNECTION',
538 7: 'SSH_FX_CONNECTION_LOST',
539 8: 'SSH_FX_OP_UNSUPPORTED'
541 return reasons
.get(code
, 'Unknown Status Code');
543 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
544 if isinstance(headers
, dict):
546 if(sys
.version
[0]=="2"):
547 for headkey
, headvalue
in headers
.iteritems():
548 returnval
.append((headkey
, headvalue
));
549 if(sys
.version
[0]>="3"):
550 for headkey
, headvalue
in headers
.items():
551 returnval
.append((headkey
, headvalue
));
552 elif isinstance(headers
, list):
558 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
559 if isinstance(headers
, dict):
561 if(sys
.version
[0]=="2"):
562 for headkey
, headvalue
in headers
.iteritems():
563 returnval
.append(headkey
+": "+headvalue
);
564 if(sys
.version
[0]>="3"):
565 for headkey
, headvalue
in headers
.items():
566 returnval
.append(headkey
+": "+headvalue
);
567 elif isinstance(headers
, list):
573 def make_http_headers_from_pycurl_to_dict(headers
):
575 headers
= headers
.strip().split('\r\n');
576 for header
in headers
:
577 parts
= header
.split(': ', 1)
580 header_dict
[key
.title()] = value
;
583 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", compression_supported
), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
584 if isinstance(headers
, list):
589 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
591 elif isinstance(headers
, dict):
597 def get_httplib_support(checkvalue
=None):
598 global haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
600 returnval
.append("ftp");
601 returnval
.append("httplib");
603 returnval
.append("httplib2");
604 returnval
.append("urllib");
606 returnval
.append("urllib3");
607 returnval
.append("request3");
608 returnval
.append("request");
610 returnval
.append("requests");
612 returnval
.append("aiohttp");
614 returnval
.append("httpx");
615 returnval
.append("httpx2");
617 returnval
.append("mechanize");
619 returnval
.append("pycurl");
620 if(hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
621 returnval
.append("pycurl2");
622 if(hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
623 returnval
.append("pycurl3");
625 returnval
.append("sftp");
627 returnval
.append("pysftp");
628 if(not checkvalue
is None):
629 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
630 checkvalue
= "urllib";
631 if(checkvalue
=="httplib1"):
632 checkvalue
= "httplib";
633 if(checkvalue
in returnval
):
639 def check_httplib_support(checkvalue
="urllib"):
640 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
641 checkvalue
= "urllib";
642 if(checkvalue
=="httplib1"):
643 checkvalue
= "httplib";
644 returnval
= get_httplib_support(checkvalue
);
647 def get_httplib_support_list():
648 returnval
= get_httplib_support(None);
651 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1, timeout
=10):
652 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
654 sleep
= geturls_download_sleep
;
657 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
658 httplibuse
= "urllib";
659 if(httplibuse
=="httplib1"):
660 httplibuse
= "httplib";
661 if(not haverequests
and httplibuse
=="requests"):
662 httplibuse
= "urllib";
663 if(not haveaiohttp
and httplibuse
=="aiohttp"):
664 httplibuse
= "urllib";
665 if(not havehttpx
and httplibuse
=="httpx"):
666 httplibuse
= "urllib";
667 if(not havehttpx
and httplibuse
=="httpx2"):
668 httplibuse
= "urllib";
669 if(not havehttpcore
and httplibuse
=="httpcore"):
670 httplibuse
= "urllib";
671 if(not havehttpcore
and httplibuse
=="httpcore2"):
672 httplibuse
= "urllib";
673 if(not havemechanize
and httplibuse
=="mechanize"):
674 httplibuse
= "urllib";
675 if(not havepycurl
and httplibuse
=="pycurl"):
676 httplibuse
= "urllib";
677 if(not havepycurl
and httplibuse
=="pycurl2"):
678 httplibuse
= "urllib";
679 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
680 httplibuse
= "pycurl";
681 if(not havepycurl
and httplibuse
=="pycurl3"):
682 httplibuse
= "urllib";
683 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
684 httplibuse
= "pycurl2";
685 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
686 httplibuse
= "pycurl";
687 if(not havehttplib2
and httplibuse
=="httplib2"):
688 httplibuse
= "httplib";
689 if(not haveparamiko
and httplibuse
=="sftp"):
691 if(not havepysftp
and httplibuse
=="pysftp"):
693 if(httplibuse
=="urllib" or httplibuse
=="request"):
694 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
695 elif(httplibuse
=="request"):
696 returnval
= download_from_url_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
697 elif(httplibuse
=="request3"):
698 returnval
= download_from_url_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
699 elif(httplibuse
=="httplib"):
700 returnval
= download_from_url_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
701 elif(httplibuse
=="httplib2"):
702 returnval
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
703 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
704 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
705 elif(httplibuse
=="requests"):
706 returnval
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
707 elif(httplibuse
=="aiohttp"):
708 returnval
= download_from_url_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
709 elif(httplibuse
=="httpx"):
710 returnval
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
711 elif(httplibuse
=="httpx2"):
712 returnval
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
713 elif(httplibuse
=="httpcore"):
714 returnval
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
715 elif(httplibuse
=="httpcore2"):
716 returnval
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
717 elif(httplibuse
=="mechanize"):
718 returnval
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
719 elif(httplibuse
=="pycurl"):
720 returnval
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
721 elif(httplibuse
=="pycurl2"):
722 returnval
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
723 elif(httplibuse
=="pycurl3"):
724 returnval
= download_from_url_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
725 elif(httplibuse
=="ftp"):
726 returnval
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
727 elif(httplibuse
=="sftp"):
728 returnval
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
729 elif(httplibuse
=="pysftp"):
730 returnval
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
735 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
736 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
738 sleep
= geturls_download_sleep
;
741 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
742 httplibuse
= "urllib";
743 if(httplibuse
=="httplib1"):
744 httplibuse
= "httplib";
745 if(not haverequests
and httplibuse
=="requests"):
746 httplibuse
= "urllib";
747 if(not haveaiohttp
and httplibuse
=="aiohttp"):
748 httplibuse
= "urllib";
749 if(not havehttpx
and httplibuse
=="httpx"):
750 httplibuse
= "urllib";
751 if(not havehttpx
and httplibuse
=="httpx2"):
752 httplibuse
= "urllib";
753 if(not havehttpcore
and httplibuse
=="httpcore"):
754 httplibuse
= "urllib";
755 if(not havehttpcore
and httplibuse
=="httpcore2"):
756 httplibuse
= "urllib";
757 if(not havemechanize
and httplibuse
=="mechanize"):
758 httplibuse
= "urllib";
759 if(not havepycurl
and httplibuse
=="pycurl"):
760 httplibuse
= "urllib";
761 if(not havepycurl
and httplibuse
=="pycurl2"):
762 httplibuse
= "urllib";
763 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
764 httplibuse
= "pycurl";
765 if(not havepycurl
and httplibuse
=="pycurl3"):
766 httplibuse
= "urllib";
767 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
768 httplibuse
= "pycurl2";
769 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
770 httplibuse
= "pycurl";
771 if(not havehttplib2
and httplibuse
=="httplib2"):
772 httplibuse
= "httplib";
773 if(not haveparamiko
and httplibuse
=="sftp"):
775 if(not haveparamiko
and httplibuse
=="pysftp"):
777 if(httplibuse
=="urllib" or httplibuse
=="request"):
778 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
779 elif(httplibuse
=="request"):
780 returnval
= download_from_url_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
781 elif(httplibuse
=="request3"):
782 returnval
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
783 elif(httplibuse
=="httplib"):
784 returnval
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
785 elif(httplibuse
=="httplib2"):
786 returnval
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
787 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
788 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
789 elif(httplibuse
=="requests"):
790 returnval
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
791 elif(httplibuse
=="aiohttp"):
792 returnval
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
793 elif(httplibuse
=="httpx"):
794 returnval
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
795 elif(httplibuse
=="httpx2"):
796 returnval
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
797 elif(httplibuse
=="httpcore"):
798 returnval
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
799 elif(httplibuse
=="httpcore2"):
800 returnval
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
801 elif(httplibuse
=="mechanize"):
802 returnval
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
803 elif(httplibuse
=="pycurl"):
804 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
805 elif(httplibuse
=="pycurl2"):
806 returnval
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
807 elif(httplibuse
=="pycurl3"):
808 returnval
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
809 elif(httplibuse
=="ftp"):
810 returnval
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
811 elif(httplibuse
=="sftp"):
812 returnval
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
813 elif(httplibuse
=="pysftp"):
814 returnval
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
819 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
820 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
822 sleep
= geturls_download_sleep
;
825 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
826 httplibuse
= "urllib";
827 if(httplibuse
=="httplib1"):
828 httplibuse
= "httplib";
829 if(not haverequests
and httplibuse
=="requests"):
830 httplibuse
= "urllib";
831 if(not haveaiohttp
and httplibuse
=="aiohttp"):
832 httplibuse
= "urllib";
833 if(not havehttpx
and httplibuse
=="httpx"):
834 httplibuse
= "urllib";
835 if(not havehttpx
and httplibuse
=="httpx2"):
836 httplibuse
= "urllib";
837 if(not havehttpcore
and httplibuse
=="httpcore"):
838 httplibuse
= "urllib";
839 if(not havehttpcore
and httplibuse
=="httpcore2"):
840 httplibuse
= "urllib";
841 if(not havemechanize
and httplibuse
=="mechanize"):
842 httplibuse
= "urllib";
843 if(not havepycurl
and httplibuse
=="pycurl"):
844 httplibuse
= "urllib";
845 if(not havepycurl
and httplibuse
=="pycurl2"):
846 httplibuse
= "urllib";
847 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
848 httplibuse
= "pycurl";
849 if(not havepycurl
and httplibuse
=="pycurl3"):
850 httplibuse
= "urllib";
851 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
852 httplibuse
= "pycurl2";
853 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
854 httplibuse
= "pycurl";
855 if(not havehttplib2
and httplibuse
=="httplib2"):
856 httplibuse
= "httplib";
857 if(not haveparamiko
and httplibuse
=="sftp"):
859 if(not havepysftp
and httplibuse
=="pysftp"):
861 if(httplibuse
=="urllib" or httplibuse
=="request"):
862 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
863 elif(httplibuse
=="request"):
864 returnval
= download_from_url_to_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
865 elif(httplibuse
=="request3"):
866 returnval
= download_from_url_to_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
867 elif(httplibuse
=="httplib"):
868 returnval
= download_from_url_to_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
869 elif(httplibuse
=="httplib2"):
870 returnval
= download_from_url_to_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
871 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
872 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
873 elif(httplibuse
=="requests"):
874 returnval
= download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
875 elif(httplibuse
=="aiohttp"):
876 returnval
= download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
877 elif(httplibuse
=="httpx"):
878 returnval
= download_from_url_to_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
879 elif(httplibuse
=="httpx2"):
880 returnval
= download_from_url_to_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
881 elif(httplibuse
=="httpcore"):
882 returnval
= download_from_url_to_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
883 elif(httplibuse
=="httpcore2"):
884 returnval
= download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
885 elif(httplibuse
=="mechanize"):
886 returnval
= download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
887 elif(httplibuse
=="pycurl"):
888 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
889 elif(httplibuse
=="pycurl2"):
890 returnval
= download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
891 elif(httplibuse
=="pycurl3"):
892 returnval
= download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
893 elif(httplibuse
=="ftp"):
894 returnval
= download_from_url_to_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
895 elif(httplibuse
=="sftp"):
896 returnval
= download_from_url_to_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
897 elif(httplibuse
=="pysftp"):
898 returnval
= download_from_url_to_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
903 def download_from_url_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
904 global geturls_download_sleep
, havezstd
, havebrotli
;
906 sleep
= geturls_download_sleep
;
909 urlparts
= urlparse
.urlparse(httpurl
);
910 if(isinstance(httpheaders
, list)):
911 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
912 httpheaders
= fix_header_names(httpheaders
);
913 if(httpuseragent
is not None):
914 if('User-Agent' in httpheaders
):
915 httpheaders
['User-Agent'] = httpuseragent
;
917 httpuseragent
.update({'User-Agent': httpuseragent
});
918 if(httpreferer
is not None):
919 if('Referer' in httpheaders
):
920 httpheaders
['Referer'] = httpreferer
;
922 httpuseragent
.update({'Referer': httpreferer
});
923 if(urlparts
.username
is not None or urlparts
.password
is not None):
924 if(sys
.version
[0]=="2"):
925 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
926 if(sys
.version
[0]>="3"):
927 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
928 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
929 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
930 if(isinstance(httpheaders
, dict)):
931 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
932 geturls_opener
.addheaders
= httpheaders
;
934 if(postdata
is not None and not isinstance(postdata
, dict)):
935 postdata
= urlencode(postdata
);
937 geturls_request
= Request(httpurl
);
938 if(httpmethod
=="GET"):
939 geturls_text
= geturls_opener
.open(geturls_request
);
940 elif(httpmethod
=="POST"):
941 geturls_text
= geturls_opener
.open(geturls_request
, data
=postdata
);
943 geturls_text
= geturls_opener
.open(geturls_request
);
944 except HTTPError
as geturls_text_error
:
945 geturls_text
= geturls_text_error
;
946 log
.info("Error With URL "+httpurl
);
948 log
.info("Error With URL "+httpurl
);
950 except socket
.timeout
:
951 log
.info("Error With URL "+httpurl
);
953 httpcodeout
= geturls_text
.getcode();
955 httpcodereason
= geturls_text
.reason
;
956 except AttributeError:
957 httpcodereason
= http_status_to_reason(geturls_text
.getcode());
959 httpversionout
= geturls_text
.version
;
960 except AttributeError:
961 httpversionout
= "1.1";
962 httpmethodout
= geturls_request
.get_method();
963 httpurlout
= geturls_text
.geturl();
964 httpheaderout
= geturls_text
.info();
965 httpheadersentout
= httpheaders
;
966 if(isinstance(httpheaderout
, list)):
967 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
968 httpheaderout
= fix_header_names(httpheaderout
);
969 if(sys
.version
[0]=="2"):
971 prehttpheaderout
= httpheaderout
;
972 httpheaderkeys
= httpheaderout
.keys();
973 imax
= len(httpheaderkeys
);
977 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
979 except AttributeError:
981 if(isinstance(httpheadersentout
, list)):
982 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
983 httpheadersentout
= fix_header_names(httpheadersentout
);
984 downloadsize
= httpheaderout
.get('Content-Length');
985 if(downloadsize
is not None):
986 downloadsize
= int(downloadsize
);
987 if downloadsize
is None: downloadsize
= 0;
990 log
.info("Downloading URL "+httpurl
);
991 with
BytesIO() as strbuf
:
993 databytes
= geturls_text
.read(buffersize
);
994 if not databytes
: break;
995 datasize
= len(databytes
);
996 fulldatasize
= datasize
+ fulldatasize
;
999 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1000 downloaddiff
= fulldatasize
- prevdownsize
;
1001 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1002 prevdownsize
= fulldatasize
;
1003 strbuf
.write(databytes
);
1005 returnval_content
= strbuf
.read();
1006 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1008 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1011 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1013 returnval_content
= zlib
.decompress(returnval_content
);
1016 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1018 returnval_content
= brotli
.decompress(returnval_content
);
1019 except brotli
.error
:
1021 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1023 returnval_content
= zstandard
.decompress(returnval_content
);
1024 except zstandard
.error
:
1026 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1028 returnval_content
= lzma
.decompress(returnval_content
);
1029 except zstandard
.error
:
1031 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1033 returnval_content
= bz2
.decompress(returnval_content
);
1034 except zstandard
.error
:
1036 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "urllib"};
1037 geturls_text
.close();
1040 def download_from_url_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1041 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1042 exec_time_start
= time
.time();
1043 myhash
= hashlib
.new("sha1");
1044 if(sys
.version
[0]=="2"):
1045 myhash
.update(httpurl
);
1046 myhash
.update(str(buffersize
));
1047 myhash
.update(str(exec_time_start
));
1048 if(sys
.version
[0]>="3"):
1049 myhash
.update(httpurl
.encode('utf-8'));
1050 myhash
.update(str(buffersize
).encode('utf-8'));
1051 myhash
.update(str(exec_time_start
).encode('utf-8'));
1052 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1054 sleep
= geturls_download_sleep
;
1057 pretmpfilename
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1058 if(not pretmpfilename
):
1060 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1061 tmpfilename
= f
.name
;
1063 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1064 except AttributeError:
1066 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1071 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1072 f
.write(pretmpfilename
.get('Content'));
1074 exec_time_end
= time
.time();
1075 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1076 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1079 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1080 global geturls_download_sleep
, havezstd
, havebrotli
;
1082 sleep
= geturls_download_sleep
;
1085 if(not outfile
=="-"):
1086 outpath
= outpath
.rstrip(os
.path
.sep
);
1087 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1088 if(not os
.path
.exists(outpath
)):
1089 os
.makedirs(outpath
);
1090 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1092 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1094 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1095 if(not pretmpfilename
):
1097 tmpfilename
= pretmpfilename
.get('Filename');
1098 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1100 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1101 exec_time_start
= time
.time();
1102 shutil
.move(tmpfilename
, filepath
);
1104 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1105 except AttributeError:
1107 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1112 exec_time_end
= time
.time();
1113 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1114 if(os
.path
.exists(tmpfilename
)):
1115 os
.remove(tmpfilename
);
1116 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1118 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1119 tmpfilename
= pretmpfilename
.get('Filename');
1120 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1123 exec_time_start
= time
.time();
1124 with
open(tmpfilename
, 'rb') as ft
:
1127 databytes
= ft
.read(buffersize
[1]);
1128 if not databytes
: break;
1129 datasize
= len(databytes
);
1130 fulldatasize
= datasize
+ fulldatasize
;
1133 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1134 downloaddiff
= fulldatasize
- prevdownsize
;
1135 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1136 prevdownsize
= fulldatasize
;
1139 fdata
= f
.getvalue();
1142 os
.remove(tmpfilename
);
1143 exec_time_end
= time
.time();
1144 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1145 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1148 def download_from_url_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1149 global geturls_download_sleep
, havezstd
, havebrotli
, havezstd
, havebrotli
;
1151 sleep
= geturls_download_sleep
;
1154 urlparts
= urlparse
.urlparse(httpurl
);
1155 if(isinstance(httpheaders
, list)):
1156 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1157 httpheaders
= fix_header_names(httpheaders
);
1158 if(httpuseragent
is not None):
1159 if('User-Agent' in httpheaders
):
1160 httpheaders
['User-Agent'] = httpuseragent
;
1162 httpuseragent
.update({'User-Agent': httpuseragent
});
1163 if(httpreferer
is not None):
1164 if('Referer' in httpheaders
):
1165 httpheaders
['Referer'] = httpreferer
;
1167 httpuseragent
.update({'Referer': httpreferer
});
1168 if(urlparts
.username
is not None or urlparts
.password
is not None):
1169 if(sys
.version
[0]=="2"):
1170 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1171 if(sys
.version
[0]>="3"):
1172 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1173 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1174 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1175 geturls_opener
.addheaders
= httpheaders
;
1177 if(urlparts
[0]=="http"):
1178 httpconn
= HTTPConnection(urlparts
[1], timeout
=timeout
);
1179 elif(urlparts
[0]=="https"):
1180 httpconn
= HTTPSConnection(urlparts
[1], timeout
=timeout
);
1183 if(postdata
is not None and not isinstance(postdata
, dict)):
1184 postdata
= urlencode(postdata
);
1186 if(httpmethod
=="GET"):
1187 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1188 elif(httpmethod
=="POST"):
1189 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1191 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1192 except socket
.timeout
:
1193 log
.info("Error With URL "+httpurl
);
1195 except socket
.gaierror
:
1196 log
.info("Error With URL "+httpurl
);
1198 except BlockingIOError
:
1199 log
.info("Error With URL "+httpurl
);
1201 geturls_text
= httpconn
.getresponse();
1202 httpcodeout
= geturls_text
.status
;
1203 httpcodereason
= geturls_text
.reason
;
1204 if(geturls_text
.version
=="10"):
1205 httpversionout
= "1.0";
1207 httpversionout
= "1.1";
1208 httpmethodout
= geturls_text
._method
;
1209 httpurlout
= httpurl
;
1210 httpheaderout
= geturls_text
.getheaders();
1211 httpheadersentout
= httpheaders
;
1212 if(isinstance(httpheaderout
, list)):
1213 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1214 if(sys
.version
[0]=="2"):
1216 prehttpheaderout
= httpheaderout
;
1217 httpheaderkeys
= httpheaderout
.keys();
1218 imax
= len(httpheaderkeys
);
1222 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1224 except AttributeError:
1226 httpheaderout
= fix_header_names(httpheaderout
);
1227 if(isinstance(httpheadersentout
, list)):
1228 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1229 httpheadersentout
= fix_header_names(httpheadersentout
);
1230 downloadsize
= httpheaderout
.get('Content-Length');
1231 if(downloadsize
is not None):
1232 downloadsize
= int(downloadsize
);
1233 if downloadsize
is None: downloadsize
= 0;
1236 log
.info("Downloading URL "+httpurl
);
1237 with
BytesIO() as strbuf
:
1239 databytes
= geturls_text
.read(buffersize
);
1240 if not databytes
: break;
1241 datasize
= len(databytes
);
1242 fulldatasize
= datasize
+ fulldatasize
;
1245 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1246 downloaddiff
= fulldatasize
- prevdownsize
;
1247 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1248 prevdownsize
= fulldatasize
;
1249 strbuf
.write(databytes
);
1251 returnval_content
= strbuf
.read();
1252 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1254 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1257 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1259 returnval_content
= zlib
.decompress(returnval_content
);
1262 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1264 returnval_content
= brotli
.decompress(returnval_content
);
1265 except brotli
.error
:
1267 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1269 returnval_content
= zstandard
.decompress(returnval_content
);
1270 except zstandard
.error
:
1272 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1274 returnval_content
= lzma
.decompress(returnval_content
);
1275 except zstandard
.error
:
1277 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1279 returnval_content
= bz2
.decompress(returnval_content
);
1280 except zstandard
.error
:
1282 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httplib"};
1283 geturls_text
.close();
1286 def download_from_url_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1287 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1288 exec_time_start
= time
.time();
1289 myhash
= hashlib
.new("sha1");
1290 if(sys
.version
[0]=="2"):
1291 myhash
.update(httpurl
);
1292 myhash
.update(str(buffersize
));
1293 myhash
.update(str(exec_time_start
));
1294 if(sys
.version
[0]>="3"):
1295 myhash
.update(httpurl
.encode('utf-8'));
1296 myhash
.update(str(buffersize
).encode('utf-8'));
1297 myhash
.update(str(exec_time_start
).encode('utf-8'));
1298 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1300 sleep
= geturls_download_sleep
;
1303 pretmpfilename
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1304 if(not pretmpfilename
):
1306 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1307 tmpfilename
= f
.name
;
1309 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1310 except AttributeError:
1312 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1317 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1318 f
.write(pretmpfilename
.get('Content'));
1320 exec_time_end
= time
.time();
1321 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1322 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1325 def download_from_url_to_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1326 global geturls_download_sleep
, havezstd
, havebrotli
;
1328 sleep
= geturls_download_sleep
;
1331 if(not outfile
=="-"):
1332 outpath
= outpath
.rstrip(os
.path
.sep
);
1333 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1334 if(not os
.path
.exists(outpath
)):
1335 os
.makedirs(outpath
);
1336 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1338 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1340 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1341 if(not pretmpfilename
):
1343 tmpfilename
= pretmpfilename
.get('Filename');
1344 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1346 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1347 exec_time_start
= time
.time();
1348 shutil
.move(tmpfilename
, filepath
);
1350 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1351 except AttributeError:
1353 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1358 exec_time_end
= time
.time();
1359 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1360 if(os
.path
.exists(tmpfilename
)):
1361 os
.remove(tmpfilename
);
1362 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1364 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1365 tmpfilename
= pretmpfilename
.get('Filename');
1366 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1369 exec_time_start
= time
.time();
1370 with
open(tmpfilename
, 'rb') as ft
:
1373 databytes
= ft
.read(buffersize
[1]);
1374 if not databytes
: break;
1375 datasize
= len(databytes
);
1376 fulldatasize
= datasize
+ fulldatasize
;
1379 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1380 downloaddiff
= fulldatasize
- prevdownsize
;
1381 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1382 prevdownsize
= fulldatasize
;
1385 fdata
= f
.getvalue();
1388 os
.remove(tmpfilename
);
1389 exec_time_end
= time
.time();
1390 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1391 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1395 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1396 global geturls_download_sleep
, havezstd
, havebrotli
;
1398 sleep
= geturls_download_sleep
;
1401 urlparts
= urlparse
.urlparse(httpurl
);
1402 if(isinstance(httpheaders
, list)):
1403 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1404 httpheaders
= fix_header_names(httpheaders
);
1405 if(httpuseragent
is not None):
1406 if('User-Agent' in httpheaders
):
1407 httpheaders
['User-Agent'] = httpuseragent
;
1409 httpuseragent
.update({'User-Agent': httpuseragent
});
1410 if(httpreferer
is not None):
1411 if('Referer' in httpheaders
):
1412 httpheaders
['Referer'] = httpreferer
;
1414 httpuseragent
.update({'Referer': httpreferer
});
1415 if(urlparts
.username
is not None or urlparts
.password
is not None):
1416 if(sys
.version
[0]=="2"):
1417 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1418 if(sys
.version
[0]>="3"):
1419 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1420 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1421 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1422 geturls_opener
.addheaders
= httpheaders
;
1424 if(urlparts
[0]=="http"):
1425 httpconn
= HTTPConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
1426 elif(urlparts
[0]=="https"):
1427 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
1430 if(postdata
is not None and not isinstance(postdata
, dict)):
1431 postdata
= urlencode(postdata
);
1433 if(httpmethod
=="GET"):
1434 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1435 elif(httpmethod
=="POST"):
1436 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1438 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1439 except socket
.timeout
:
1440 log
.info("Error With URL "+httpurl
);
1442 except socket
.gaierror
:
1443 log
.info("Error With URL "+httpurl
);
1445 except BlockingIOError
:
1446 log
.info("Error With URL "+httpurl
);
1448 geturls_text
= httpconn
.getresponse();
1449 httpcodeout
= geturls_text
.status
;
1450 httpcodereason
= geturls_text
.reason
;
1451 if(geturls_text
.version
=="10"):
1452 httpversionout
= "1.0";
1454 httpversionout
= "1.1";
1455 httpmethodout
= httpmethod
;
1456 httpurlout
= httpurl
;
1457 httpheaderout
= geturls_text
.getheaders();
1458 httpheadersentout
= httpheaders
;
1459 if(isinstance(httpheaderout
, list)):
1460 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1461 if(sys
.version
[0]=="2"):
1463 prehttpheaderout
= httpheaderout
;
1464 httpheaderkeys
= httpheaderout
.keys();
1465 imax
= len(httpheaderkeys
);
1469 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1471 except AttributeError:
1473 httpheaderout
= fix_header_names(httpheaderout
);
1474 if(isinstance(httpheadersentout
, list)):
1475 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1476 httpheadersentout
= fix_header_names(httpheadersentout
);
1477 downloadsize
= httpheaderout
.get('Content-Length');
1478 if(downloadsize
is not None):
1479 downloadsize
= int(downloadsize
);
1480 if downloadsize
is None: downloadsize
= 0;
1483 log
.info("Downloading URL "+httpurl
);
1484 with
BytesIO() as strbuf
:
1486 databytes
= geturls_text
.read(buffersize
);
1487 if not databytes
: break;
1488 datasize
= len(databytes
);
1489 fulldatasize
= datasize
+ fulldatasize
;
1492 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1493 downloaddiff
= fulldatasize
- prevdownsize
;
1494 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1495 prevdownsize
= fulldatasize
;
1496 strbuf
.write(databytes
);
1498 returnval_content
= strbuf
.read();
1499 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1501 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1504 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1506 returnval_content
= zlib
.decompress(returnval_content
);
1509 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1511 returnval_content
= brotli
.decompress(returnval_content
);
1512 except brotli
.error
:
1514 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1516 returnval_content
= zstandard
.decompress(returnval_content
);
1517 except zstandard
.error
:
1519 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1521 returnval_content
= lzma
.decompress(returnval_content
);
1522 except zstandard
.error
:
1524 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1526 returnval_content
= bz2
.decompress(returnval_content
);
1527 except zstandard
.error
:
1529 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httplib2"};
1530 geturls_text
.close();
1533 if(not havehttplib2
):
1534 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1535 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1539 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1540 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1541 exec_time_start
= time
.time();
1542 myhash
= hashlib
.new("sha1");
1543 if(sys
.version
[0]=="2"):
1544 myhash
.update(httpurl
);
1545 myhash
.update(str(buffersize
));
1546 myhash
.update(str(exec_time_start
));
1547 if(sys
.version
[0]>="3"):
1548 myhash
.update(httpurl
.encode('utf-8'));
1549 myhash
.update(str(buffersize
).encode('utf-8'));
1550 myhash
.update(str(exec_time_start
).encode('utf-8'));
1551 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1553 sleep
= geturls_download_sleep
;
1556 pretmpfilename
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1557 if(not pretmpfilename
):
1559 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1560 tmpfilename
= f
.name
;
1562 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1563 except AttributeError:
1565 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1570 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1571 f
.write(pretmpfilename
.get('Content'));
1573 exec_time_end
= time
.time();
1574 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1575 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1578 if(not havehttplib2
):
1579 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1580 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1584 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1585 global geturls_download_sleep
, havezstd
, havebrotli
;
1587 sleep
= geturls_download_sleep
;
1590 if(not outfile
=="-"):
1591 outpath
= outpath
.rstrip(os
.path
.sep
);
1592 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1593 if(not os
.path
.exists(outpath
)):
1594 os
.makedirs(outpath
);
1595 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1597 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1599 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1600 if(not pretmpfilename
):
1602 tmpfilename
= pretmpfilename
.get('Filename');
1603 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1605 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1606 exec_time_start
= time
.time();
1607 shutil
.move(tmpfilename
, filepath
);
1609 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1610 except AttributeError:
1612 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1617 exec_time_end
= time
.time();
1618 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1619 if(os
.path
.exists(tmpfilename
)):
1620 os
.remove(tmpfilename
);
1621 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1623 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1624 tmpfilename
= pretmpfilename
.get('Filename');
1625 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1628 exec_time_start
= time
.time();
1629 with
open(tmpfilename
, 'rb') as ft
:
1632 databytes
= ft
.read(buffersize
[1]);
1633 if not databytes
: break;
1634 datasize
= len(databytes
);
1635 fulldatasize
= datasize
+ fulldatasize
;
1638 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1639 downloaddiff
= fulldatasize
- prevdownsize
;
1640 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1641 prevdownsize
= fulldatasize
;
1644 fdata
= f
.getvalue();
1647 os
.remove(tmpfilename
);
1648 exec_time_end
= time
.time();
1649 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1650 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1653 if(not havehttplib2
):
1654 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1655 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1658 def download_from_url_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1659 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1662 def download_from_url_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1663 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1666 def download_from_url_to_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1667 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1671 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1672 global geturls_download_sleep
, havezstd
, havebrotli
;
1674 sleep
= geturls_download_sleep
;
1677 urlparts
= urlparse
.urlparse(httpurl
);
1678 if(isinstance(httpheaders
, list)):
1679 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1680 httpheaders
= fix_header_names(httpheaders
);
1681 if(httpuseragent
is not None):
1682 if('User-Agent' in httpheaders
):
1683 httpheaders
['User-Agent'] = httpuseragent
;
1685 httpuseragent
.update({'User-Agent': httpuseragent
});
1686 if(httpreferer
is not None):
1687 if('Referer' in httpheaders
):
1688 httpheaders
['Referer'] = httpreferer
;
1690 httpuseragent
.update({'Referer': httpreferer
});
1691 if(urlparts
.username
is not None or urlparts
.password
is not None):
1692 if(sys
.version
[0]=="2"):
1693 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1694 if(sys
.version
[0]>="3"):
1695 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1696 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1698 if(postdata
is not None and not isinstance(postdata
, dict)):
1699 postdata
= urlencode(postdata
);
1701 reqsession
= requests
.Session();
1702 if(httpmethod
=="GET"):
1703 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1704 elif(httpmethod
=="POST"):
1705 geturls_text
= reqsession
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1707 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1708 except requests
.exceptions
.ConnectTimeout
:
1709 log
.info("Error With URL "+httpurl
);
1711 except requests
.exceptions
.ConnectError
:
1712 log
.info("Error With URL "+httpurl
);
1714 except socket
.timeout
:
1715 log
.info("Error With URL "+httpurl
);
1717 httpcodeout
= geturls_text
.status_code
;
1718 httpcodereason
= geturls_text
.reason
;
1719 if(geturls_text
.raw
.version
=="10"):
1720 httpversionout
= "1.0";
1722 httpversionout
= "1.1";
1723 httpmethodout
= httpmethod
;
1724 httpurlout
= geturls_text
.url
;
1725 httpheaderout
= geturls_text
.headers
;
1726 httpheadersentout
= geturls_text
.request
.headers
;
1727 if(isinstance(httpheaderout
, list)):
1728 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1729 if(sys
.version
[0]=="2"):
1731 prehttpheaderout
= httpheaderout
;
1732 httpheaderkeys
= httpheaderout
.keys();
1733 imax
= len(httpheaderkeys
);
1737 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1739 except AttributeError:
1741 httpheaderout
= fix_header_names(httpheaderout
);
1742 if(isinstance(httpheadersentout
, list)):
1743 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1744 httpheadersentout
= fix_header_names(httpheadersentout
);
1745 downloadsize
= httpheaderout
.get('Content-Length');
1746 if(downloadsize
is not None):
1747 downloadsize
= int(downloadsize
);
1748 if downloadsize
is None: downloadsize
= 0;
1751 log
.info("Downloading URL "+httpurl
);
1752 with
BytesIO() as strbuf
:
1754 databytes
= geturls_text
.raw
.read(buffersize
);
1755 if not databytes
: break;
1756 datasize
= len(databytes
);
1757 fulldatasize
= datasize
+ fulldatasize
;
1760 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1761 downloaddiff
= fulldatasize
- prevdownsize
;
1762 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1763 prevdownsize
= fulldatasize
;
1764 strbuf
.write(databytes
);
1766 returnval_content
= strbuf
.read();
1767 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1769 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1772 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1774 returnval_content
= zlib
.decompress(returnval_content
);
1777 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1779 returnval_content
= brotli
.decompress(returnval_content
);
1780 except brotli
.error
:
1782 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1784 returnval_content
= zstandard
.decompress(returnval_content
);
1785 except zstandard
.error
:
1787 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1789 returnval_content
= lzma
.decompress(returnval_content
);
1790 except zstandard
.error
:
1792 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1794 returnval_content
= bz2
.decompress(returnval_content
);
1795 except zstandard
.error
:
1797 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "requests"};
1798 geturls_text
.close();
1801 if(not haverequests
):
1802 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1803 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1807 def download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1808 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1809 exec_time_start
= time
.time();
1810 myhash
= hashlib
.new("sha1");
1811 if(sys
.version
[0]=="2"):
1812 myhash
.update(httpurl
);
1813 myhash
.update(str(buffersize
));
1814 myhash
.update(str(exec_time_start
));
1815 if(sys
.version
[0]>="3"):
1816 myhash
.update(httpurl
.encode('utf-8'));
1817 myhash
.update(str(buffersize
).encode('utf-8'));
1818 myhash
.update(str(exec_time_start
).encode('utf-8'));
1819 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1821 sleep
= geturls_download_sleep
;
1824 pretmpfilename
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1825 if(not pretmpfilename
):
1827 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1828 tmpfilename
= f
.name
;
1830 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1831 except AttributeError:
1833 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1838 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1839 f
.write(pretmpfilename
.get('Content'));
1841 exec_time_end
= time
.time();
1842 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1843 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1846 if(not haverequests
):
1847 def download_from_url_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1848 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1852 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1853 global geturls_download_sleep
, havezstd
, havebrotli
;
1855 sleep
= geturls_download_sleep
;
1858 if(not outfile
=="-"):
1859 outpath
= outpath
.rstrip(os
.path
.sep
);
1860 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1861 if(not os
.path
.exists(outpath
)):
1862 os
.makedirs(outpath
);
1863 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1865 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1867 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1868 if(not pretmpfilename
):
1870 tmpfilename
= pretmpfilename
.get('Filename');
1871 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1873 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1874 exec_time_start
= time
.time();
1875 shutil
.move(tmpfilename
, filepath
);
1877 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1878 except AttributeError:
1880 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1885 exec_time_end
= time
.time();
1886 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1887 if(os
.path
.exists(tmpfilename
)):
1888 os
.remove(tmpfilename
);
1889 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1891 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1892 tmpfilename
= pretmpfilename
.get('Filename');
1893 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1896 exec_time_start
= time
.time();
1897 with
open(tmpfilename
, 'rb') as ft
:
1900 databytes
= ft
.read(buffersize
[1]);
1901 if not databytes
: break;
1902 datasize
= len(databytes
);
1903 fulldatasize
= datasize
+ fulldatasize
;
1906 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1907 downloaddiff
= fulldatasize
- prevdownsize
;
1908 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1909 prevdownsize
= fulldatasize
;
1912 fdata
= f
.getvalue();
1915 os
.remove(tmpfilename
);
1916 exec_time_end
= time
.time();
1917 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1918 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1921 if(not haverequests
):
1922 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1923 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1927 def download_from_url_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1928 global geturls_download_sleep
, havezstd
, havebrotli
;
1930 sleep
= geturls_download_sleep
;
1933 urlparts
= urlparse
.urlparse(httpurl
);
1934 if(isinstance(httpheaders
, list)):
1935 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1936 httpheaders
= fix_header_names(httpheaders
);
1937 if(httpuseragent
is not None):
1938 if('User-Agent' in httpheaders
):
1939 httpheaders
['User-Agent'] = httpuseragent
;
1941 httpuseragent
.update({'User-Agent': httpuseragent
});
1942 if(httpreferer
is not None):
1943 if('Referer' in httpheaders
):
1944 httpheaders
['Referer'] = httpreferer
;
1946 httpuseragent
.update({'Referer': httpreferer
});
1947 if(urlparts
.username
is not None or urlparts
.password
is not None):
1948 if(sys
.version
[0]=="2"):
1949 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1950 if(sys
.version
[0]>="3"):
1951 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1952 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1954 if(postdata
is not None and not isinstance(postdata
, dict)):
1955 postdata
= urlencode(postdata
);
1957 reqsession
= aiohttp
.ClientSession(cookie_jar
=httpcookie
, headers
=httpheaders
, timeout
=timeout
, read_timeout
=timeout
, conn_timeout
=timeout
, read_bufsize
=buffersize
);
1958 if(httpmethod
=="GET"):
1959 geturls_text
= reqsession
.get(httpurl
);
1960 elif(httpmethod
=="POST"):
1961 geturls_text
= reqsession
.post(httpurl
, data
=postdata
);
1963 geturls_text
= reqsession
.get(httpurl
);
1964 except aiohttp
.exceptions
.ConnectTimeout
:
1965 log
.info("Error With URL "+httpurl
);
1967 except aiohttp
.exceptions
.ConnectError
:
1968 log
.info("Error With URL "+httpurl
);
1970 except socket
.timeout
:
1971 log
.info("Error With URL "+httpurl
);
1973 httpcodeout
= geturls_text
.status
;
1974 httpcodereason
= geturls_text
.reason
;
1975 httpversionout
= geturls_text
.version
;
1976 httpmethodout
= geturls_text
.method
;
1977 httpurlout
= geturls_text
.url
;
1978 httpheaderout
= geturls_text
.headers
;
1979 httpheadersentout
= geturls_text
.request_info
.headers
;
1980 if(isinstance(httpheaderout
, list)):
1981 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1982 if(sys
.version
[0]=="2"):
1984 prehttpheaderout
= httpheaderout
;
1985 httpheaderkeys
= httpheaderout
.keys();
1986 imax
= len(httpheaderkeys
);
1990 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1992 except AttributeError:
1994 httpheaderout
= fix_header_names(httpheaderout
);
1995 if(isinstance(httpheadersentout
, list)):
1996 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1997 httpheadersentout
= fix_header_names(httpheadersentout
);
1998 downloadsize
= httpheaderout
.get('Content-Length');
1999 if(downloadsize
is not None):
2000 downloadsize
= int(downloadsize
);
2001 if downloadsize
is None: downloadsize
= 0;
2004 log
.info("Downloading URL "+httpurl
);
2005 with
BytesIO() as strbuf
:
2007 databytes
= geturls_text
.read(buffersize
);
2008 if not databytes
: break;
2009 datasize
= len(databytes
);
2010 fulldatasize
= datasize
+ fulldatasize
;
2013 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2014 downloaddiff
= fulldatasize
- prevdownsize
;
2015 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2016 prevdownsize
= fulldatasize
;
2017 strbuf
.write(databytes
);
2019 returnval_content
= strbuf
.read();
2020 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2022 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2025 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2027 returnval_content
= zlib
.decompress(returnval_content
);
2030 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2032 returnval_content
= brotli
.decompress(returnval_content
);
2033 except brotli
.error
:
2035 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2037 returnval_content
= zstandard
.decompress(returnval_content
);
2038 except zstandard
.error
:
2040 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2042 returnval_content
= lzma
.decompress(returnval_content
);
2043 except zstandard
.error
:
2045 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2047 returnval_content
= bz2
.decompress(returnval_content
);
2048 except zstandard
.error
:
2050 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "aiohttp"};
2051 geturls_text
.close();
2054 if(not haveaiohttp
):
2055 def download_from_url_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2056 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2060 def download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2061 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2062 exec_time_start
= time
.time();
2063 myhash
= hashlib
.new("sha1");
2064 if(sys
.version
[0]=="2"):
2065 myhash
.update(httpurl
);
2066 myhash
.update(str(buffersize
));
2067 myhash
.update(str(exec_time_start
));
2068 if(sys
.version
[0]>="3"):
2069 myhash
.update(httpurl
.encode('utf-8'));
2070 myhash
.update(str(buffersize
).encode('utf-8'));
2071 myhash
.update(str(exec_time_start
).encode('utf-8'));
2072 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2074 sleep
= geturls_download_sleep
;
2077 pretmpfilename
= download_from_url_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2078 if(not pretmpfilename
):
2080 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2081 tmpfilename
= f
.name
;
2083 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2084 except AttributeError:
2086 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2091 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2092 f
.write(pretmpfilename
.get('Content'));
2094 exec_time_end
= time
.time();
2095 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2096 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2099 if(not haveaiohttp
):
2100 def download_from_url_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2101 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2105 def download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2106 global geturls_download_sleep
, havezstd
, havebrotli
;
2108 sleep
= geturls_download_sleep
;
2111 if(not outfile
=="-"):
2112 outpath
= outpath
.rstrip(os
.path
.sep
);
2113 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2114 if(not os
.path
.exists(outpath
)):
2115 os
.makedirs(outpath
);
2116 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2118 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2120 pretmpfilename
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2121 if(not pretmpfilename
):
2123 tmpfilename
= pretmpfilename
.get('Filename');
2124 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2126 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2127 exec_time_start
= time
.time();
2128 shutil
.move(tmpfilename
, filepath
);
2130 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2131 except AttributeError:
2133 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2138 exec_time_end
= time
.time();
2139 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2140 if(os
.path
.exists(tmpfilename
)):
2141 os
.remove(tmpfilename
);
2142 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2144 pretmpfilename
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2145 tmpfilename
= pretmpfilename
.get('Filename');
2146 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2149 exec_time_start
= time
.time();
2150 with
open(tmpfilename
, 'rb') as ft
:
2153 databytes
= ft
.read(buffersize
[1]);
2154 if not databytes
: break;
2155 datasize
= len(databytes
);
2156 fulldatasize
= datasize
+ fulldatasize
;
2159 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2160 downloaddiff
= fulldatasize
- prevdownsize
;
2161 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2162 prevdownsize
= fulldatasize
;
2165 fdata
= f
.getvalue();
2168 os
.remove(tmpfilename
);
2169 exec_time_end
= time
.time();
2170 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2171 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2174 if(not haveaiohttp
):
2175 def download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2176 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2180 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2181 global geturls_download_sleep
, havezstd
, havebrotli
;
2183 sleep
= geturls_download_sleep
;
2186 urlparts
= urlparse
.urlparse(httpurl
);
2187 if(isinstance(httpheaders
, list)):
2188 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2189 httpheaders
= fix_header_names(httpheaders
);
2190 if(httpuseragent
is not None):
2191 if('User-Agent' in httpheaders
):
2192 httpheaders
['User-Agent'] = httpuseragent
;
2194 httpuseragent
.update({'User-Agent': httpuseragent
});
2195 if(httpreferer
is not None):
2196 if('Referer' in httpheaders
):
2197 httpheaders
['Referer'] = httpreferer
;
2199 httpuseragent
.update({'Referer': httpreferer
});
2200 if(urlparts
.username
is not None or urlparts
.password
is not None):
2201 if(sys
.version
[0]=="2"):
2202 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2203 if(sys
.version
[0]>="3"):
2204 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2205 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2207 if(postdata
is not None and not isinstance(postdata
, dict)):
2208 postdata
= urlencode(postdata
);
2210 if(httpmethod
=="GET"):
2211 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2212 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2213 elif(httpmethod
=="POST"):
2214 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2215 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2217 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2218 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2219 except httpx
.ConnectTimeout
:
2220 log
.info("Error With URL "+httpurl
);
2222 except httpx
.ConnectError
:
2223 log
.info("Error With URL "+httpurl
);
2225 except socket
.timeout
:
2226 log
.info("Error With URL "+httpurl
);
2228 httpcodeout
= geturls_text
.status_code
;
2230 httpcodereason
= geturls_text
.reason_phrase
;
2232 httpcodereason
= http_status_to_reason(geturls_text
.status_code
);
2233 httpversionout
= geturls_text
.http_version
;
2234 httpmethodout
= httpmethod
;
2235 httpurlout
= str(geturls_text
.url
);
2236 httpheaderout
= geturls_text
.headers
;
2237 httpheadersentout
= geturls_text
.request
.headers
;
2238 if(isinstance(httpheaderout
, list)):
2239 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2240 if(sys
.version
[0]=="2"):
2242 prehttpheaderout
= httpheaderout
;
2243 httpheaderkeys
= httpheaderout
.keys();
2244 imax
= len(httpheaderkeys
);
2248 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2250 except AttributeError:
2252 httpheaderout
= fix_header_names(httpheaderout
);
2253 if(isinstance(httpheadersentout
, list)):
2254 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2255 httpheadersentout
= fix_header_names(httpheadersentout
);
2256 downloadsize
= httpheaderout
.get('Content-Length');
2257 if(downloadsize
is not None):
2258 downloadsize
= int(downloadsize
);
2259 if downloadsize
is None: downloadsize
= 0;
2262 log
.info("Downloading URL "+httpurl
);
2263 with
BytesIO() as strbuf
:
2265 databytes
= geturls_text
.read();
2266 if not databytes
: break;
2267 datasize
= len(databytes
);
2268 fulldatasize
= datasize
+ fulldatasize
;
2271 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2272 downloaddiff
= fulldatasize
- prevdownsize
;
2273 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2274 prevdownsize
= fulldatasize
;
2275 strbuf
.write(databytes
);
2278 returnval_content
= strbuf
.read();
2279 geturls_text
.close();
2280 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2282 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2285 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2287 returnval_content
= zlib
.decompress(returnval_content
);
2290 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2292 returnval_content
= brotli
.decompress(returnval_content
);
2293 except brotli
.error
:
2295 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2297 returnval_content
= zstandard
.decompress(returnval_content
);
2298 except zstandard
.error
:
2300 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2302 returnval_content
= lzma
.decompress(returnval_content
);
2303 except zstandard
.error
:
2305 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2307 returnval_content
= bz2
.decompress(returnval_content
);
2308 except zstandard
.error
:
2310 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpx"};
2311 geturls_text
.close();
2315 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2316 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2320 def download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2321 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2322 exec_time_start
= time
.time();
2323 myhash
= hashlib
.new("sha1");
2324 if(sys
.version
[0]=="2"):
2325 myhash
.update(httpurl
);
2326 myhash
.update(str(buffersize
));
2327 myhash
.update(str(exec_time_start
));
2328 if(sys
.version
[0]>="3"):
2329 myhash
.update(httpurl
.encode('utf-8'));
2330 myhash
.update(str(buffersize
).encode('utf-8'));
2331 myhash
.update(str(exec_time_start
).encode('utf-8'));
2332 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2334 sleep
= geturls_download_sleep
;
2337 pretmpfilename
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2338 if(not pretmpfilename
):
2340 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2341 tmpfilename
= f
.name
;
2343 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2344 except AttributeError:
2346 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2351 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2352 f
.write(pretmpfilename
.get('Content'));
2354 exec_time_end
= time
.time();
2355 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2356 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2360 def download_from_url_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2361 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2365 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2366 global geturls_download_sleep
, havezstd
, havebrotli
;
2368 sleep
= geturls_download_sleep
;
2371 if(not outfile
=="-"):
2372 outpath
= outpath
.rstrip(os
.path
.sep
);
2373 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2374 if(not os
.path
.exists(outpath
)):
2375 os
.makedirs(outpath
);
2376 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2378 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2380 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2381 if(not pretmpfilename
):
2383 tmpfilename
= pretmpfilename
.get('Filename');
2384 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2386 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2387 exec_time_start
= time
.time();
2388 shutil
.move(tmpfilename
, filepath
);
2390 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2391 except AttributeError:
2393 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2398 exec_time_end
= time
.time();
2399 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2400 if(os
.path
.exists(tmpfilename
)):
2401 os
.remove(tmpfilename
);
2402 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2404 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2405 tmpfilename
= pretmpfilename
.get('Filename');
2406 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2409 exec_time_start
= time
.time();
2410 with
open(tmpfilename
, 'rb') as ft
:
2413 databytes
= ft
.read(buffersize
[1]);
2414 if not databytes
: break;
2415 datasize
= len(databytes
);
2416 fulldatasize
= datasize
+ fulldatasize
;
2419 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2420 downloaddiff
= fulldatasize
- prevdownsize
;
2421 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2422 prevdownsize
= fulldatasize
;
2425 fdata
= f
.getvalue();
2428 os
.remove(tmpfilename
);
2429 exec_time_end
= time
.time();
2430 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2431 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2435 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2436 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2440 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2441 global geturls_download_sleep
, havezstd
, havebrotli
;
2443 sleep
= geturls_download_sleep
;
2446 urlparts
= urlparse
.urlparse(httpurl
);
2447 if(isinstance(httpheaders
, list)):
2448 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2449 httpheaders
= fix_header_names(httpheaders
);
2450 if(httpuseragent
is not None):
2451 if('User-Agent' in httpheaders
):
2452 httpheaders
['User-Agent'] = httpuseragent
;
2454 httpuseragent
.update({'User-Agent': httpuseragent
});
2455 if(httpreferer
is not None):
2456 if('Referer' in httpheaders
):
2457 httpheaders
['Referer'] = httpreferer
;
2459 httpuseragent
.update({'Referer': httpreferer
});
2460 if(urlparts
.username
is not None or urlparts
.password
is not None):
2461 if(sys
.version
[0]=="2"):
2462 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2463 if(sys
.version
[0]>="3"):
2464 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2465 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2467 if(postdata
is not None and not isinstance(postdata
, dict)):
2468 postdata
= urlencode(postdata
);
2470 if(httpmethod
=="GET"):
2471 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2472 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2473 elif(httpmethod
=="POST"):
2474 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2475 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2477 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2478 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2479 except httpx
.ConnectTimeout
:
2480 log
.info("Error With URL "+httpurl
);
2482 except httpx
.ConnectError
:
2483 log
.info("Error With URL "+httpurl
);
2485 except socket
.timeout
:
2486 log
.info("Error With URL "+httpurl
);
2488 httpcodeout
= geturls_text
.status_code
;
2490 httpcodereason
= geturls_text
.reason_phrase
;
2492 httpcodereason
= http_status_to_reason(geturls_text
.status_code
);
2493 httpversionout
= geturls_text
.http_version
;
2494 httpmethodout
= httpmethod
;
2495 httpurlout
= str(geturls_text
.url
);
2496 httpheaderout
= geturls_text
.headers
;
2497 httpheadersentout
= geturls_text
.request
.headers
;
2498 if(isinstance(httpheaderout
, list)):
2499 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2500 if(sys
.version
[0]=="2"):
2502 prehttpheaderout
= httpheaderout
;
2503 httpheaderkeys
= httpheaderout
.keys();
2504 imax
= len(httpheaderkeys
);
2508 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2510 except AttributeError:
2512 httpheaderout
= fix_header_names(httpheaderout
);
2513 if(isinstance(httpheadersentout
, list)):
2514 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2515 httpheadersentout
= fix_header_names(httpheadersentout
);
2516 downloadsize
= httpheaderout
.get('Content-Length');
2517 if(downloadsize
is not None):
2518 downloadsize
= int(downloadsize
);
2519 if downloadsize
is None: downloadsize
= 0;
2522 log
.info("Downloading URL "+httpurl
);
2523 with
BytesIO() as strbuf
:
2525 databytes
= geturls_text
.read();
2526 if not databytes
: break;
2527 datasize
= len(databytes
);
2528 fulldatasize
= datasize
+ fulldatasize
;
2531 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2532 downloaddiff
= fulldatasize
- prevdownsize
;
2533 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2534 prevdownsize
= fulldatasize
;
2535 strbuf
.write(databytes
);
2538 returnval_content
= strbuf
.read();
2539 geturls_text
.close();
2540 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2542 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2545 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2547 returnval_content
= zlib
.decompress(returnval_content
);
2550 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2552 returnval_content
= brotli
.decompress(returnval_content
);
2553 except brotli
.error
:
2555 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2557 returnval_content
= zstandard
.decompress(returnval_content
);
2558 except zstandard
.error
:
2560 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2562 returnval_content
= lzma
.decompress(returnval_content
);
2563 except zstandard
.error
:
2565 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2567 returnval_content
= bz2
.decompress(returnval_content
);
2568 except zstandard
.error
:
2570 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpx2"};
2571 geturls_text
.close();
2575 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2576 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2580 def download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2581 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2582 exec_time_start
= time
.time();
2583 myhash
= hashlib
.new("sha1");
2584 if(sys
.version
[0]=="2"):
2585 myhash
.update(httpurl
);
2586 myhash
.update(str(buffersize
));
2587 myhash
.update(str(exec_time_start
));
2588 if(sys
.version
[0]>="3"):
2589 myhash
.update(httpurl
.encode('utf-8'));
2590 myhash
.update(str(buffersize
).encode('utf-8'));
2591 myhash
.update(str(exec_time_start
).encode('utf-8'));
2592 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2594 sleep
= geturls_download_sleep
;
2597 pretmpfilename
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2598 if(not pretmpfilename
):
2600 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2601 tmpfilename
= f
.name
;
2603 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2604 except AttributeError:
2606 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2611 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2612 f
.write(pretmpfilename
.get('Content'));
2614 exec_time_end
= time
.time();
2615 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2616 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2620 def download_from_url_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2621 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2625 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2626 global geturls_download_sleep
, havezstd
, havebrotli
;
2628 sleep
= geturls_download_sleep
;
2631 if(not outfile
=="-"):
2632 outpath
= outpath
.rstrip(os
.path
.sep
);
2633 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2634 if(not os
.path
.exists(outpath
)):
2635 os
.makedirs(outpath
);
2636 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2638 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2640 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2641 if(not pretmpfilename
):
2643 tmpfilename
= pretmpfilename
.get('Filename');
2644 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2646 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2647 exec_time_start
= time
.time();
2648 shutil
.move(tmpfilename
, filepath
);
2650 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2651 except AttributeError:
2653 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2658 exec_time_end
= time
.time();
2659 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2660 if(os
.path
.exists(tmpfilename
)):
2661 os
.remove(tmpfilename
);
2662 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2664 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2665 tmpfilename
= pretmpfilename
.get('Filename');
2666 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2669 exec_time_start
= time
.time();
2670 with
open(tmpfilename
, 'rb') as ft
:
2673 databytes
= ft
.read(buffersize
[1]);
2674 if not databytes
: break;
2675 datasize
= len(databytes
);
2676 fulldatasize
= datasize
+ fulldatasize
;
2679 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2680 downloaddiff
= fulldatasize
- prevdownsize
;
2681 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2682 prevdownsize
= fulldatasize
;
2685 fdata
= f
.getvalue();
2688 os
.remove(tmpfilename
);
2689 exec_time_end
= time
.time();
2690 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2691 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2695 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2696 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2700 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2701 global geturls_download_sleep
, havezstd
, havebrotli
;
2703 sleep
= geturls_download_sleep
;
2706 urlparts
= urlparse
.urlparse(httpurl
);
2707 if(isinstance(httpheaders
, list)):
2708 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2709 httpheaders
= fix_header_names(httpheaders
);
2710 if(httpuseragent
is not None):
2711 if('User-Agent' in httpheaders
):
2712 httpheaders
['User-Agent'] = httpuseragent
;
2714 httpuseragent
.update({'User-Agent': httpuseragent
});
2715 if(httpreferer
is not None):
2716 if('Referer' in httpheaders
):
2717 httpheaders
['Referer'] = httpreferer
;
2719 httpuseragent
.update({'Referer': httpreferer
});
2720 if(urlparts
.username
is not None or urlparts
.password
is not None):
2721 if(sys
.version
[0]=="2"):
2722 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2723 if(sys
.version
[0]>="3"):
2724 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2725 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2727 if(postdata
is not None and not isinstance(postdata
, dict)):
2728 postdata
= urlencode(postdata
);
2730 if(httpmethod
=="GET"):
2731 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2732 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2733 elif(httpmethod
=="POST"):
2734 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2735 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2737 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2738 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2739 except httpcore
.ConnectTimeout
:
2740 log
.info("Error With URL "+httpurl
);
2742 except httpcore
.ConnectError
:
2743 log
.info("Error With URL "+httpurl
);
2745 except socket
.timeout
:
2746 log
.info("Error With URL "+httpurl
);
2748 httpcodeout
= geturls_text
.status
;
2749 httpcodereason
= http_status_to_reason(geturls_text
.status
);
2750 httpversionout
= "1.1";
2751 httpmethodout
= httpmethod
;
2752 httpurlout
= str(httpurl
);
2753 httpheaderout
= geturls_text
.headers
;
2754 httpheadersentout
= httpheaders
;
2755 if(isinstance(httpheaderout
, list)):
2756 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2757 if(sys
.version
[0]=="2"):
2759 prehttpheaderout
= httpheaderout
;
2760 httpheaderkeys
= httpheaderout
.keys();
2761 imax
= len(httpheaderkeys
);
2765 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2767 except AttributeError:
2769 httpheaderout
= fix_header_names(httpheaderout
);
2770 if(isinstance(httpheadersentout
, list)):
2771 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2772 httpheadersentout
= fix_header_names(httpheadersentout
);
2773 downloadsize
= httpheaderout
.get('Content-Length');
2774 if(downloadsize
is not None):
2775 downloadsize
= int(downloadsize
);
2776 if downloadsize
is None: downloadsize
= 0;
2779 log
.info("Downloading URL "+httpurl
);
2780 with
BytesIO() as strbuf
:
2782 databytes
= geturls_text
.read();
2783 if not databytes
: break;
2784 datasize
= len(databytes
);
2785 fulldatasize
= datasize
+ fulldatasize
;
2788 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2789 downloaddiff
= fulldatasize
- prevdownsize
;
2790 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2791 prevdownsize
= fulldatasize
;
2792 strbuf
.write(databytes
);
2795 returnval_content
= strbuf
.read();
2796 geturls_text
.close();
2797 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2799 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2802 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2804 returnval_content
= zlib
.decompress(returnval_content
);
2807 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2809 returnval_content
= brotli
.decompress(returnval_content
);
2810 except brotli
.error
:
2812 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2814 returnval_content
= zstandard
.decompress(returnval_content
);
2815 except zstandard
.error
:
2817 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2819 returnval_content
= lzma
.decompress(returnval_content
);
2820 except zstandard
.error
:
2822 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2824 returnval_content
= bz2
.decompress(returnval_content
);
2825 except zstandard
.error
:
2827 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpcore"};
2828 geturls_text
.close();
2831 if(not havehttpcore
):
2832 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2833 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2837 def download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2838 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2839 exec_time_start
= time
.time();
2840 myhash
= hashlib
.new("sha1");
2841 if(sys
.version
[0]=="2"):
2842 myhash
.update(httpurl
);
2843 myhash
.update(str(buffersize
));
2844 myhash
.update(str(exec_time_start
));
2845 if(sys
.version
[0]>="3"):
2846 myhash
.update(httpurl
.encode('utf-8'));
2847 myhash
.update(str(buffersize
).encode('utf-8'));
2848 myhash
.update(str(exec_time_start
).encode('utf-8'));
2849 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2851 sleep
= geturls_download_sleep
;
2854 pretmpfilename
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2855 if(not pretmpfilename
):
2857 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2858 tmpfilename
= f
.name
;
2860 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2861 except AttributeError:
2863 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2868 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2869 f
.write(pretmpfilename
.get('Content'));
2871 exec_time_end
= time
.time();
2872 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2873 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2876 if(not havehttpcore
):
2877 def download_from_url_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2878 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2882 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2883 global geturls_download_sleep
, havezstd
, havebrotli
;
2885 sleep
= geturls_download_sleep
;
2888 if(not outfile
=="-"):
2889 outpath
= outpath
.rstrip(os
.path
.sep
);
2890 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2891 if(not os
.path
.exists(outpath
)):
2892 os
.makedirs(outpath
);
2893 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2895 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2897 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2898 if(not pretmpfilename
):
2900 tmpfilename
= pretmpfilename
.get('Filename');
2901 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2903 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2904 exec_time_start
= time
.time();
2905 shutil
.move(tmpfilename
, filepath
);
2907 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2908 except AttributeError:
2910 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2915 exec_time_end
= time
.time();
2916 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2917 if(os
.path
.exists(tmpfilename
)):
2918 os
.remove(tmpfilename
);
2919 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2921 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2922 tmpfilename
= pretmpfilename
.get('Filename');
2923 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2926 exec_time_start
= time
.time();
2927 with
open(tmpfilename
, 'rb') as ft
:
2930 databytes
= ft
.read(buffersize
[1]);
2931 if not databytes
: break;
2932 datasize
= len(databytes
);
2933 fulldatasize
= datasize
+ fulldatasize
;
2936 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2937 downloaddiff
= fulldatasize
- prevdownsize
;
2938 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2939 prevdownsize
= fulldatasize
;
2942 fdata
= f
.getvalue();
2945 os
.remove(tmpfilename
);
2946 exec_time_end
= time
.time();
2947 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2948 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2951 if(not havehttpcore
):
2952 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2953 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2957 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2958 global geturls_download_sleep
, havezstd
, havebrotli
;
2960 sleep
= geturls_download_sleep
;
2963 urlparts
= urlparse
.urlparse(httpurl
);
2964 if(isinstance(httpheaders
, list)):
2965 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2966 httpheaders
= fix_header_names(httpheaders
);
2967 if(httpuseragent
is not None):
2968 if('User-Agent' in httpheaders
):
2969 httpheaders
['User-Agent'] = httpuseragent
;
2971 httpuseragent
.update({'User-Agent': httpuseragent
});
2972 if(httpreferer
is not None):
2973 if('Referer' in httpheaders
):
2974 httpheaders
['Referer'] = httpreferer
;
2976 httpuseragent
.update({'Referer': httpreferer
});
2977 if(urlparts
.username
is not None or urlparts
.password
is not None):
2978 if(sys
.version
[0]=="2"):
2979 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2980 if(sys
.version
[0]>="3"):
2981 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2982 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2984 if(postdata
is not None and not isinstance(postdata
, dict)):
2985 postdata
= urlencode(postdata
);
2987 if(httpmethod
=="GET"):
2988 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2989 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2990 elif(httpmethod
=="POST"):
2991 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2992 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2994 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2995 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2996 except httpcore
.ConnectTimeout
:
2997 log
.info("Error With URL "+httpurl
);
2999 except httpcore
.ConnectError
:
3000 log
.info("Error With URL "+httpurl
);
3002 except socket
.timeout
:
3003 log
.info("Error With URL "+httpurl
);
3005 httpcodeout
= geturls_text
.status
;
3006 httpcodereason
= http_status_to_reason(geturls_text
.status
);
3007 httpversionout
= "1.1";
3008 httpmethodout
= httpmethod
;
3009 httpurlout
= str(httpurl
);
3010 httpheaderout
= geturls_text
.headers
;
3011 httpheadersentout
= httpheaders
;
3012 if(isinstance(httpheaderout
, list)):
3013 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3014 if(sys
.version
[0]=="2"):
3016 prehttpheaderout
= httpheaderout
;
3017 httpheaderkeys
= httpheaderout
.keys();
3018 imax
= len(httpheaderkeys
);
3022 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3024 except AttributeError:
3026 httpheaderout
= fix_header_names(httpheaderout
);
3027 if(isinstance(httpheadersentout
, list)):
3028 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3029 httpheadersentout
= fix_header_names(httpheadersentout
);
3030 downloadsize
= httpheaderout
.get('Content-Length');
3031 if(downloadsize
is not None):
3032 downloadsize
= int(downloadsize
);
3033 if downloadsize
is None: downloadsize
= 0;
3036 log
.info("Downloading URL "+httpurl
);
3037 with
BytesIO() as strbuf
:
3039 databytes
= geturls_text
.read();
3040 if not databytes
: break;
3041 datasize
= len(databytes
);
3042 fulldatasize
= datasize
+ fulldatasize
;
3045 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3046 downloaddiff
= fulldatasize
- prevdownsize
;
3047 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3048 prevdownsize
= fulldatasize
;
3049 strbuf
.write(databytes
);
3052 returnval_content
= strbuf
.read();
3053 geturls_text
.close();
3054 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3056 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3059 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3061 returnval_content
= zlib
.decompress(returnval_content
);
3064 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3066 returnval_content
= brotli
.decompress(returnval_content
);
3067 except brotli
.error
:
3069 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3071 returnval_content
= zstandard
.decompress(returnval_content
);
3072 except zstandard
.error
:
3074 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3076 returnval_content
= lzma
.decompress(returnval_content
);
3077 except zstandard
.error
:
3079 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3081 returnval_content
= bz2
.decompress(returnval_content
);
3082 except zstandard
.error
:
3084 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpcore2"};
3085 geturls_text
.close();
3088 if(not havehttpcore
):
3089 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3090 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3094 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3095 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3096 exec_time_start
= time
.time();
3097 myhash
= hashlib
.new("sha1");
3098 if(sys
.version
[0]=="2"):
3099 myhash
.update(httpurl
);
3100 myhash
.update(str(buffersize
));
3101 myhash
.update(str(exec_time_start
));
3102 if(sys
.version
[0]>="3"):
3103 myhash
.update(httpurl
.encode('utf-8'));
3104 myhash
.update(str(buffersize
).encode('utf-8'));
3105 myhash
.update(str(exec_time_start
).encode('utf-8'));
3106 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3108 sleep
= geturls_download_sleep
;
3111 pretmpfilename
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3112 if(not pretmpfilename
):
3114 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3115 tmpfilename
= f
.name
;
3117 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3118 except AttributeError:
3120 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3125 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3126 f
.write(pretmpfilename
.get('Content'));
3128 exec_time_end
= time
.time();
3129 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3130 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3133 if(not havehttpcore
):
3134 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3135 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3139 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3140 global geturls_download_sleep
, havezstd
, havebrotli
;
3142 sleep
= geturls_download_sleep
;
3145 if(not outfile
=="-"):
3146 outpath
= outpath
.rstrip(os
.path
.sep
);
3147 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3148 if(not os
.path
.exists(outpath
)):
3149 os
.makedirs(outpath
);
3150 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3152 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3154 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3155 if(not pretmpfilename
):
3157 tmpfilename
= pretmpfilename
.get('Filename');
3158 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3160 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3161 exec_time_start
= time
.time();
3162 shutil
.move(tmpfilename
, filepath
);
3164 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3165 except AttributeError:
3167 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3172 exec_time_end
= time
.time();
3173 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3174 if(os
.path
.exists(tmpfilename
)):
3175 os
.remove(tmpfilename
);
3176 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3178 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3179 tmpfilename
= pretmpfilename
.get('Filename');
3180 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3183 exec_time_start
= time
.time();
3184 with
open(tmpfilename
, 'rb') as ft
:
3187 databytes
= ft
.read(buffersize
[1]);
3188 if not databytes
: break;
3189 datasize
= len(databytes
);
3190 fulldatasize
= datasize
+ fulldatasize
;
3193 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3194 downloaddiff
= fulldatasize
- prevdownsize
;
3195 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3196 prevdownsize
= fulldatasize
;
3199 fdata
= f
.getvalue();
3202 os
.remove(tmpfilename
);
3203 exec_time_end
= time
.time();
3204 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3205 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3209 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3210 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3214 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3215 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3218 if(not haveurllib3
):
3219 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3220 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3224 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3225 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3228 if(not haveurllib3
):
3229 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3230 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3234 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3235 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3238 if(not haveurllib3
):
3239 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3240 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3244 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3245 global geturls_download_sleep
, havezstd
, havebrotli
;
3247 sleep
= geturls_download_sleep
;
3250 urlparts
= urlparse
.urlparse(httpurl
);
3251 if(isinstance(httpheaders
, list)):
3252 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3253 httpheaders
= fix_header_names(httpheaders
);
3254 if(httpuseragent
is not None):
3255 if('User-Agent' in httpheaders
):
3256 httpheaders
['User-Agent'] = httpuseragent
;
3258 httpuseragent
.update({'User-Agent': httpuseragent
});
3259 if(httpreferer
is not None):
3260 if('Referer' in httpheaders
):
3261 httpheaders
['Referer'] = httpreferer
;
3263 httpuseragent
.update({'Referer': httpreferer
});
3264 if(urlparts
.username
is not None or urlparts
.password
is not None):
3265 if(sys
.version
[0]=="2"):
3266 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3267 if(sys
.version
[0]>="3"):
3268 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3269 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3271 timeout
= urllib3
.util
.Timeout(connect
=timeout
, read
=timeout
);
3272 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
, timeout
=timeout
);
3273 if(postdata
is not None and not isinstance(postdata
, dict)):
3274 postdata
= urlencode(postdata
);
3276 if(httpmethod
=="GET"):
3277 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3278 elif(httpmethod
=="POST"):
3279 geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
3281 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3282 except urllib3
.exceptions
.ConnectTimeoutError
:
3283 log
.info("Error With URL "+httpurl
);
3285 except urllib3
.exceptions
.ConnectError
:
3286 log
.info("Error With URL "+httpurl
);
3288 except urllib3
.exceptions
.MaxRetryError
:
3289 log
.info("Error With URL "+httpurl
);
3291 except socket
.timeout
:
3292 log
.info("Error With URL "+httpurl
);
3295 log
.info("Error With URL "+httpurl
);
3297 httpcodeout
= geturls_text
.status
;
3298 httpcodereason
= geturls_text
.reason
;
3299 if(geturls_text
.version
=="10"):
3300 httpversionout
= "1.0";
3302 httpversionout
= "1.1";
3303 httpmethodout
= httpmethod
;
3304 httpurlout
= geturls_text
.geturl();
3305 httpheaderout
= geturls_text
.info();
3306 httpheadersentout
= httpheaders
;
3307 if(isinstance(httpheaderout
, list)):
3308 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3309 if(sys
.version
[0]=="2"):
3311 prehttpheaderout
= httpheaderout
;
3312 httpheaderkeys
= httpheaderout
.keys();
3313 imax
= len(httpheaderkeys
);
3317 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3319 except AttributeError:
3321 httpheaderout
= fix_header_names(httpheaderout
);
3322 if(isinstance(httpheadersentout
, list)):
3323 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3324 httpheadersentout
= fix_header_names(httpheadersentout
);
3325 downloadsize
= httpheaderout
.get('Content-Length');
3326 if(downloadsize
is not None):
3327 downloadsize
= int(downloadsize
);
3328 if downloadsize
is None: downloadsize
= 0;
3331 log
.info("Downloading URL "+httpurl
);
3332 with
BytesIO() as strbuf
:
3334 databytes
= geturls_text
.read(buffersize
);
3335 if not databytes
: break;
3336 datasize
= len(databytes
);
3337 fulldatasize
= datasize
+ fulldatasize
;
3340 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3341 downloaddiff
= fulldatasize
- prevdownsize
;
3342 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3343 prevdownsize
= fulldatasize
;
3344 strbuf
.write(databytes
);
3346 returnval_content
= strbuf
.read();
3347 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3349 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3352 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3354 returnval_content
= zlib
.decompress(returnval_content
);
3357 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3359 returnval_content
= brotli
.decompress(returnval_content
);
3360 except brotli
.error
:
3362 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3364 returnval_content
= zstandard
.decompress(returnval_content
);
3365 except zstandard
.error
:
3367 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3369 returnval_content
= lzma
.decompress(returnval_content
);
3370 except zstandard
.error
:
3372 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3374 returnval_content
= bz2
.decompress(returnval_content
);
3375 except zstandard
.error
:
3377 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "urllib3"};
3378 geturls_text
.close();
3381 if(not haveurllib3
):
3382 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3383 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3387 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3388 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3389 exec_time_start
= time
.time();
3390 myhash
= hashlib
.new("sha1");
3391 if(sys
.version
[0]=="2"):
3392 myhash
.update(httpurl
);
3393 myhash
.update(str(buffersize
));
3394 myhash
.update(str(exec_time_start
));
3395 if(sys
.version
[0]>="3"):
3396 myhash
.update(httpurl
.encode('utf-8'));
3397 myhash
.update(str(buffersize
).encode('utf-8'));
3398 myhash
.update(str(exec_time_start
).encode('utf-8'));
3399 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3401 sleep
= geturls_download_sleep
;
3404 pretmpfilename
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3405 if(not pretmpfilename
):
3407 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3408 tmpfilename
= f
.name
;
3410 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3411 except AttributeError:
3413 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3418 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3419 f
.write(pretmpfilename
.get('Content'));
3421 exec_time_end
= time
.time();
3422 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3423 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3426 if(not haveurllib3
):
3427 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3428 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3432 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3433 global geturls_download_sleep
, havezstd
, havebrotli
;
3435 sleep
= geturls_download_sleep
;
3438 if(not outfile
=="-"):
3439 outpath
= outpath
.rstrip(os
.path
.sep
);
3440 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3441 if(not os
.path
.exists(outpath
)):
3442 os
.makedirs(outpath
);
3443 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3445 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3447 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3448 if(not pretmpfilename
):
3450 tmpfilename
= pretmpfilename
.get('Filename');
3451 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3453 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3454 exec_time_start
= time
.time();
3455 shutil
.move(tmpfilename
, filepath
);
3457 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3458 except AttributeError:
3460 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3465 exec_time_end
= time
.time();
3466 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3467 if(os
.path
.exists(tmpfilename
)):
3468 os
.remove(tmpfilename
);
3469 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3471 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3472 tmpfilename
= pretmpfilename
.get('Filename');
3473 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3476 exec_time_start
= time
.time();
3477 with
open(tmpfilename
, 'rb') as ft
:
3480 databytes
= ft
.read(buffersize
[1]);
3481 if not databytes
: break;
3482 datasize
= len(databytes
);
3483 fulldatasize
= datasize
+ fulldatasize
;
3486 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3487 downloaddiff
= fulldatasize
- prevdownsize
;
3488 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3489 prevdownsize
= fulldatasize
;
3492 fdata
= f
.getvalue();
3495 os
.remove(tmpfilename
);
3496 exec_time_end
= time
.time();
3497 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3498 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3501 if(not haveurllib3
):
3502 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3503 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3507 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3508 global geturls_download_sleep
, havezstd
, havebrotli
;
3510 sleep
= geturls_download_sleep
;
3513 urlparts
= urlparse
.urlparse(httpurl
);
3514 if(isinstance(httpheaders
, list)):
3515 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3516 httpheaders
= fix_header_names(httpheaders
);
3517 if(httpuseragent
is not None):
3518 if('User-Agent' in httpheaders
):
3519 httpheaders
['User-Agent'] = httpuseragent
;
3521 httpuseragent
.update({'User-Agent': httpuseragent
});
3522 if(httpreferer
is not None):
3523 if('Referer' in httpheaders
):
3524 httpheaders
['Referer'] = httpreferer
;
3526 httpuseragent
.update({'Referer': httpreferer
});
3527 if(urlparts
.username
is not None or urlparts
.password
is not None):
3528 if(sys
.version
[0]=="2"):
3529 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3530 if(sys
.version
[0]>="3"):
3531 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3532 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3533 geturls_opener
= mechanize
.Browser();
3534 if(isinstance(httpheaders
, dict)):
3535 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
3537 geturls_opener
.addheaders
= httpheaders
;
3538 geturls_opener
.set_cookiejar(httpcookie
);
3539 geturls_opener
.set_handle_robots(False);
3540 if(postdata
is not None and not isinstance(postdata
, dict)):
3541 postdata
= urlencode(postdata
);
3543 if(httpmethod
=="GET"):
3544 geturls_text
= geturls_opener
.open(httpurl
);
3545 elif(httpmethod
=="POST"):
3546 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
3548 geturls_text
= geturls_opener
.open(httpurl
);
3549 except mechanize
.HTTPError
as geturls_text_error
:
3550 geturls_text
= geturls_text_error
;
3551 log
.info("Error With URL "+httpurl
);
3553 log
.info("Error With URL "+httpurl
);
3555 except socket
.timeout
:
3556 log
.info("Error With URL "+httpurl
);
3558 httpcodeout
= geturls_text
.code
;
3559 httpcodereason
= geturls_text
.msg
;
3560 httpversionout
= "1.1";
3561 httpmethodout
= httpmethod
;
3562 httpurlout
= geturls_text
.geturl();
3563 httpheaderout
= geturls_text
.info();
3564 reqhead
= geturls_opener
.request
;
3565 httpheadersentout
= reqhead
.header_items();
3566 if(isinstance(httpheaderout
, list)):
3567 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3568 if(sys
.version
[0]=="2"):
3570 prehttpheaderout
= httpheaderout
;
3571 httpheaderkeys
= httpheaderout
.keys();
3572 imax
= len(httpheaderkeys
);
3576 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3578 except AttributeError:
3580 httpheaderout
= fix_header_names(httpheaderout
);
3581 if(isinstance(httpheadersentout
, list)):
3582 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3583 httpheadersentout
= fix_header_names(httpheadersentout
);
3584 downloadsize
= httpheaderout
.get('Content-Length');
3585 if(downloadsize
is not None):
3586 downloadsize
= int(downloadsize
);
3587 if downloadsize
is None: downloadsize
= 0;
3590 log
.info("Downloading URL "+httpurl
);
3591 with
BytesIO() as strbuf
:
3593 databytes
= geturls_text
.read(buffersize
);
3594 if not databytes
: break;
3595 datasize
= len(databytes
);
3596 fulldatasize
= datasize
+ fulldatasize
;
3599 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3600 downloaddiff
= fulldatasize
- prevdownsize
;
3601 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3602 prevdownsize
= fulldatasize
;
3603 strbuf
.write(databytes
);
3605 returnval_content
= strbuf
.read();
3606 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3608 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3611 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3613 returnval_content
= zlib
.decompress(returnval_content
);
3616 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3618 returnval_content
= brotli
.decompress(returnval_content
);
3619 except brotli
.error
:
3621 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3623 returnval_content
= zstandard
.decompress(returnval_content
);
3624 except zstandard
.error
:
3626 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3628 returnval_content
= lzma
.decompress(returnval_content
);
3629 except zstandard
.error
:
3631 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3633 returnval_content
= bz2
.decompress(returnval_content
);
3634 except zstandard
.error
:
3636 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "mechanize"};
3637 geturls_text
.close();
3640 if(not havemechanize
):
3641 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3642 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3646 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3647 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3648 exec_time_start
= time
.time();
3649 myhash
= hashlib
.new("sha1");
3650 if(sys
.version
[0]=="2"):
3651 myhash
.update(httpurl
);
3652 myhash
.update(str(buffersize
));
3653 myhash
.update(str(exec_time_start
));
3654 if(sys
.version
[0]>="3"):
3655 myhash
.update(httpurl
.encode('utf-8'));
3656 myhash
.update(str(buffersize
).encode('utf-8'));
3657 myhash
.update(str(exec_time_start
).encode('utf-8'));
3658 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3660 sleep
= geturls_download_sleep
;
3663 pretmpfilename
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3664 if(not pretmpfilename
):
3666 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3667 tmpfilename
= f
.name
;
3669 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3670 except AttributeError:
3672 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3677 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3678 f
.write(pretmpfilename
.get('Content'));
3680 exec_time_end
= time
.time();
3681 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3682 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3685 if(not havemechanize
):
3686 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3687 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3691 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3692 global geturls_download_sleep
, havezstd
, havebrotli
;
3694 sleep
= geturls_download_sleep
;
3697 if(not outfile
=="-"):
3698 outpath
= outpath
.rstrip(os
.path
.sep
);
3699 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3700 if(not os
.path
.exists(outpath
)):
3701 os
.makedirs(outpath
);
3702 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3704 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3706 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3707 if(not pretmpfilename
):
3709 tmpfilename
= pretmpfilename
.get('Filename');
3710 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3712 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3713 exec_time_start
= time
.time();
3714 shutil
.move(tmpfilename
, filepath
);
3716 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3717 except AttributeError:
3719 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3724 exec_time_end
= time
.time();
3725 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3726 if(os
.path
.exists(tmpfilename
)):
3727 os
.remove(tmpfilename
);
3728 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3730 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3731 tmpfilename
= pretmpfilename
.get('Filename');
3732 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3735 exec_time_start
= time
.time();
3736 with
open(tmpfilename
, 'rb') as ft
:
3739 databytes
= ft
.read(buffersize
[1]);
3740 if not databytes
: break;
3741 datasize
= len(databytes
);
3742 fulldatasize
= datasize
+ fulldatasize
;
3745 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3746 downloaddiff
= fulldatasize
- prevdownsize
;
3747 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3748 prevdownsize
= fulldatasize
;
3751 fdata
= f
.getvalue();
3754 os
.remove(tmpfilename
);
3755 exec_time_end
= time
.time();
3756 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3757 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3760 if(not havemechanize
):
3761 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3762 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3766 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3767 global geturls_download_sleep
, havezstd
, havebrotli
;
3769 sleep
= geturls_download_sleep
;
3772 urlparts
= urlparse
.urlparse(httpurl
);
3773 if(isinstance(httpheaders
, list)):
3774 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3775 httpheaders
= fix_header_names(httpheaders
);
3776 if(httpuseragent
is not None):
3777 if('User-Agent' in httpheaders
):
3778 httpheaders
['User-Agent'] = httpuseragent
;
3780 httpuseragent
.update({'User-Agent': httpuseragent
});
3781 if(httpreferer
is not None):
3782 if('Referer' in httpheaders
):
3783 httpheaders
['Referer'] = httpreferer
;
3785 httpuseragent
.update({'Referer': httpreferer
});
3786 if(urlparts
.username
is not None or urlparts
.password
is not None):
3787 if(sys
.version
[0]=="2"):
3788 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3789 if(sys
.version
[0]>="3"):
3790 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3791 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3792 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
3793 if(isinstance(httpheaders
, dict)):
3794 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
3795 geturls_opener
.addheaders
= httpheaders
;
3797 if(postdata
is not None and not isinstance(postdata
, dict)):
3798 postdata
= urlencode(postdata
);
3799 retrieved_body
= BytesIO();
3800 retrieved_headers
= BytesIO();
3802 if(httpmethod
=="GET"):
3803 geturls_text
= pycurl
.Curl();
3804 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3805 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3806 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3807 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3808 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3809 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3810 geturls_text
.perform();
3811 elif(httpmethod
=="POST"):
3812 geturls_text
= pycurl
.Curl();
3813 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3814 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3815 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3816 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3817 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3818 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3819 geturls_text
.setopt(geturls_text
.POST
, True);
3820 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
3821 geturls_text
.perform();
3823 geturls_text
= pycurl
.Curl();
3824 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3825 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3826 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3827 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3828 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3829 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3830 geturls_text
.perform();
3831 retrieved_headers
.seek(0);
3832 if(sys
.version
[0]=="2"):
3833 pycurlhead
= retrieved_headers
.read();
3834 if(sys
.version
[0]>="3"):
3835 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
3836 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip().rstrip('\r\n'))[0];
3837 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
3838 retrieved_body
.seek(0);
3839 except socket
.timeout
:
3840 log
.info("Error With URL "+httpurl
);
3842 except socket
.gaierror
:
3843 log
.info("Error With URL "+httpurl
);
3846 log
.info("Error With URL "+httpurl
);
3848 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
3849 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
3850 httpversionout
= pyhttpverinfo
[0];
3851 httpmethodout
= httpmethod
;
3852 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
3853 httpheaderout
= pycurlheadersout
;
3854 httpheadersentout
= httpheaders
;
3855 if(isinstance(httpheaderout
, list)):
3856 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
3857 if(sys
.version
[0]=="2"):
3859 prehttpheaderout
= httpheaderout
;
3860 httpheaderkeys
= httpheaderout
.keys();
3861 imax
= len(httpheaderkeys
);
3865 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3867 except AttributeError:
3869 httpheaderout
= fix_header_names(httpheaderout
);
3870 if(isinstance(httpheadersentout
, list)):
3871 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
3872 httpheadersentout
= fix_header_names(httpheadersentout
);
3873 downloadsize
= httpheaderout
.get('Content-Length');
3874 if(downloadsize
is not None):
3875 downloadsize
= int(downloadsize
);
3876 if downloadsize
is None: downloadsize
= 0;
3879 log
.info("Downloading URL "+httpurl
);
3880 with
BytesIO() as strbuf
:
3882 databytes
= retrieved_body
.read(buffersize
);
3883 if not databytes
: break;
3884 datasize
= len(databytes
);
3885 fulldatasize
= datasize
+ fulldatasize
;
3888 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3889 downloaddiff
= fulldatasize
- prevdownsize
;
3890 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3891 prevdownsize
= fulldatasize
;
3892 strbuf
.write(databytes
);
3894 returnval_content
= strbuf
.read();
3895 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3897 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3900 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3902 returnval_content
= zlib
.decompress(returnval_content
);
3905 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3907 returnval_content
= brotli
.decompress(returnval_content
);
3908 except brotli
.error
:
3910 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3912 returnval_content
= zstandard
.decompress(returnval_content
);
3913 except zstandard
.error
:
3915 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3917 returnval_content
= lzma
.decompress(returnval_content
);
3918 except zstandard
.error
:
3920 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3922 returnval_content
= bz2
.decompress(returnval_content
);
3923 except zstandard
.error
:
3925 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl"};
3926 geturls_text
.close();
3930 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3931 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3935 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3936 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3937 exec_time_start
= time
.time();
3938 myhash
= hashlib
.new("sha1");
3939 if(sys
.version
[0]=="2"):
3940 myhash
.update(httpurl
);
3941 myhash
.update(str(buffersize
));
3942 myhash
.update(str(exec_time_start
));
3943 if(sys
.version
[0]>="3"):
3944 myhash
.update(httpurl
.encode('utf-8'));
3945 myhash
.update(str(buffersize
).encode('utf-8'));
3946 myhash
.update(str(exec_time_start
).encode('utf-8'));
3947 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3949 sleep
= geturls_download_sleep
;
3952 pretmpfilename
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3953 if(not pretmpfilename
):
3955 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3956 tmpfilename
= f
.name
;
3958 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3959 except AttributeError:
3961 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3966 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3967 f
.write(pretmpfilename
.get('Content'));
3969 exec_time_end
= time
.time();
3970 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3971 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3975 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3976 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3980 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3981 global geturls_download_sleep
, havezstd
, havebrotli
;
3983 sleep
= geturls_download_sleep
;
3986 if(not outfile
=="-"):
3987 outpath
= outpath
.rstrip(os
.path
.sep
);
3988 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3989 if(not os
.path
.exists(outpath
)):
3990 os
.makedirs(outpath
);
3991 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3993 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3995 pretmpfilename
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3996 if(not pretmpfilename
):
3998 tmpfilename
= pretmpfilename
.get('Filename');
3999 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4001 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4002 exec_time_start
= time
.time();
4003 shutil
.move(tmpfilename
, filepath
);
4005 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4006 except AttributeError:
4008 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4013 exec_time_end
= time
.time();
4014 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4015 if(os
.path
.exists(tmpfilename
)):
4016 os
.remove(tmpfilename
);
4017 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4019 pretmpfilename
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4020 tmpfilename
= pretmpfilename
.get('Filename');
4021 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4024 exec_time_start
= time
.time();
4025 with
open(tmpfilename
, 'rb') as ft
:
4028 databytes
= ft
.read(buffersize
[1]);
4029 if not databytes
: break;
4030 datasize
= len(databytes
);
4031 fulldatasize
= datasize
+ fulldatasize
;
4034 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4035 downloaddiff
= fulldatasize
- prevdownsize
;
4036 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4037 prevdownsize
= fulldatasize
;
4040 fdata
= f
.getvalue();
4043 os
.remove(tmpfilename
);
4044 exec_time_end
= time
.time();
4045 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4046 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4050 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4051 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4054 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4055 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4056 global geturls_download_sleep
, havezstd
, havebrotli
;
4058 sleep
= geturls_download_sleep
;
4061 urlparts
= urlparse
.urlparse(httpurl
);
4062 if(isinstance(httpheaders
, list)):
4063 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4064 httpheaders
= fix_header_names(httpheaders
);
4065 if(httpuseragent
is not None):
4066 if('User-Agent' in httpheaders
):
4067 httpheaders
['User-Agent'] = httpuseragent
;
4069 httpuseragent
.update({'User-Agent': httpuseragent
});
4070 if(httpreferer
is not None):
4071 if('Referer' in httpheaders
):
4072 httpheaders
['Referer'] = httpreferer
;
4074 httpuseragent
.update({'Referer': httpreferer
});
4075 if(urlparts
.username
is not None or urlparts
.password
is not None):
4076 if(sys
.version
[0]=="2"):
4077 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
4078 if(sys
.version
[0]>="3"):
4079 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4080 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4081 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
4082 if(isinstance(httpheaders
, dict)):
4083 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
4084 geturls_opener
.addheaders
= httpheaders
;
4086 if(postdata
is not None and not isinstance(postdata
, dict)):
4087 postdata
= urlencode(postdata
);
4088 retrieved_body
= BytesIO();
4089 retrieved_headers
= BytesIO();
4091 if(httpmethod
=="GET"):
4092 geturls_text
= pycurl
.Curl();
4093 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4094 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4095 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4096 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4097 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4098 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4099 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4100 geturls_text
.perform();
4101 elif(httpmethod
=="POST"):
4102 geturls_text
= pycurl
.Curl();
4103 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4104 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4105 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4106 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4107 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4108 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4109 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4110 geturls_text
.setopt(geturls_text
.POST
, True);
4111 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
4112 geturls_text
.perform();
4114 geturls_text
= pycurl
.Curl();
4115 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4116 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4117 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4118 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4119 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4120 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4121 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4122 geturls_text
.perform();
4123 retrieved_headers
.seek(0);
4124 if(sys
.version
[0]=="2"):
4125 pycurlhead
= retrieved_headers
.read();
4126 if(sys
.version
[0]>="3"):
4127 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
4128 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip())[0];
4129 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
4130 retrieved_body
.seek(0);
4131 except socket
.timeout
:
4132 log
.info("Error With URL "+httpurl
);
4134 except socket
.gaierror
:
4135 log
.info("Error With URL "+httpurl
);
4138 log
.info("Error With URL "+httpurl
);
4140 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
4141 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
4142 httpversionout
= pyhttpverinfo
[0];
4143 httpmethodout
= httpmethod
;
4144 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
4145 httpheaderout
= pycurlheadersout
;
4146 httpheadersentout
= httpheaders
;
4147 if(isinstance(httpheaderout
, list)):
4148 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
4149 if(sys
.version
[0]=="2"):
4151 prehttpheaderout
= httpheaderout
;
4152 httpheaderkeys
= httpheaderout
.keys();
4153 imax
= len(httpheaderkeys
);
4157 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4159 except AttributeError:
4161 httpheaderout
= fix_header_names(httpheaderout
);
4162 if(isinstance(httpheadersentout
, list)):
4163 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
4164 httpheadersentout
= fix_header_names(httpheadersentout
);
4165 downloadsize
= httpheaderout
.get('Content-Length');
4166 if(downloadsize
is not None):
4167 downloadsize
= int(downloadsize
);
4168 if downloadsize
is None: downloadsize
= 0;
4171 log
.info("Downloading URL "+httpurl
);
4172 with
BytesIO() as strbuf
:
4174 databytes
= retrieved_body
.read(buffersize
);
4175 if not databytes
: break;
4176 datasize
= len(databytes
);
4177 fulldatasize
= datasize
+ fulldatasize
;
4180 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4181 downloaddiff
= fulldatasize
- prevdownsize
;
4182 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4183 prevdownsize
= fulldatasize
;
4184 strbuf
.write(databytes
);
4186 returnval_content
= strbuf
.read();
4187 if(httpheaderout
.get("Content-Encoding")=="gzip"):
4189 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
4192 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
4194 returnval_content
= zlib
.decompress(returnval_content
);
4197 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4199 returnval_content
= brotli
.decompress(returnval_content
);
4200 except brotli
.error
:
4202 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
4204 returnval_content
= zstandard
.decompress(returnval_content
);
4205 except zstandard
.error
:
4207 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
4209 returnval_content
= lzma
.decompress(returnval_content
);
4210 except zstandard
.error
:
4212 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
4214 returnval_content
= bz2
.decompress(returnval_content
);
4215 except zstandard
.error
:
4217 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl2"};
4218 geturls_text
.close();
4222 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4223 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4226 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4227 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4228 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4231 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4232 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4233 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4234 exec_time_start
= time
.time();
4235 myhash
= hashlib
.new("sha1");
4236 if(sys
.version
[0]=="2"):
4237 myhash
.update(httpurl
);
4238 myhash
.update(str(buffersize
));
4239 myhash
.update(str(exec_time_start
));
4240 if(sys
.version
[0]>="3"):
4241 myhash
.update(httpurl
.encode('utf-8'));
4242 myhash
.update(str(buffersize
).encode('utf-8'));
4243 myhash
.update(str(exec_time_start
).encode('utf-8'));
4244 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4246 sleep
= geturls_download_sleep
;
4249 pretmpfilename
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4250 if(not pretmpfilename
):
4252 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4253 tmpfilename
= f
.name
;
4255 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4256 except AttributeError:
4258 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4263 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4264 f
.write(pretmpfilename
.get('Content'));
4266 exec_time_end
= time
.time();
4267 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4268 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4272 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4273 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4276 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4277 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4278 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4281 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4282 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4283 global geturls_download_sleep
, havezstd
, havebrotli
;
4285 sleep
= geturls_download_sleep
;
4288 if(not outfile
=="-"):
4289 outpath
= outpath
.rstrip(os
.path
.sep
);
4290 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4291 if(not os
.path
.exists(outpath
)):
4292 os
.makedirs(outpath
);
4293 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4295 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4297 pretmpfilename
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4298 if(not pretmpfilename
):
4300 tmpfilename
= pretmpfilename
.get('Filename');
4301 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4303 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4304 exec_time_start
= time
.time();
4305 shutil
.move(tmpfilename
, filepath
);
4307 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4308 except AttributeError:
4310 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4315 exec_time_end
= time
.time();
4316 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4317 if(os
.path
.exists(tmpfilename
)):
4318 os
.remove(tmpfilename
);
4319 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4321 pretmpfilename
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4322 tmpfilename
= pretmpfilename
.get('Filename');
4323 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4326 exec_time_start
= time
.time();
4327 with
open(tmpfilename
, 'rb') as ft
:
4330 databytes
= ft
.read(buffersize
[1]);
4331 if not databytes
: break;
4332 datasize
= len(databytes
);
4333 fulldatasize
= datasize
+ fulldatasize
;
4336 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4337 downloaddiff
= fulldatasize
- prevdownsize
;
4338 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4339 prevdownsize
= fulldatasize
;
4342 fdata
= f
.getvalue();
4345 os
.remove(tmpfilename
);
4346 exec_time_end
= time
.time();
4347 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4348 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4352 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4353 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4356 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4357 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4358 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4361 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4362 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4363 global geturls_download_sleep
, havezstd
, havebrotli
;
4365 sleep
= geturls_download_sleep
;
4368 urlparts
= urlparse
.urlparse(httpurl
);
4369 if(isinstance(httpheaders
, list)):
4370 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4371 httpheaders
= fix_header_names(httpheaders
);
4372 if(httpuseragent
is not None):
4373 if('User-Agent' in httpheaders
):
4374 httpheaders
['User-Agent'] = httpuseragent
;
4376 httpuseragent
.update({'User-Agent': httpuseragent
});
4377 if(httpreferer
is not None):
4378 if('Referer' in httpheaders
):
4379 httpheaders
['Referer'] = httpreferer
;
4381 httpuseragent
.update({'Referer': httpreferer
});
4382 if(urlparts
.username
is not None or urlparts
.password
is not None):
4383 if(sys
.version
[0]=="2"):
4384 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
4385 if(sys
.version
[0]>="3"):
4386 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4387 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4388 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
4389 if(isinstance(httpheaders
, dict)):
4390 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
4391 geturls_opener
.addheaders
= httpheaders
;
4393 if(postdata
is not None and not isinstance(postdata
, dict)):
4394 postdata
= urlencode(postdata
);
4395 retrieved_body
= BytesIO();
4396 retrieved_headers
= BytesIO();
4398 if(httpmethod
=="GET"):
4399 geturls_text
= pycurl
.Curl();
4400 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4401 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4402 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4403 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4404 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4405 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4406 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4407 geturls_text
.perform();
4408 elif(httpmethod
=="POST"):
4409 geturls_text
= pycurl
.Curl();
4410 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4411 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4412 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4413 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4414 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4415 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4416 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4417 geturls_text
.setopt(geturls_text
.POST
, True);
4418 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
4419 geturls_text
.perform();
4421 geturls_text
= pycurl
.Curl();
4422 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4423 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4424 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4425 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4426 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4427 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4428 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4429 geturls_text
.perform();
4430 retrieved_headers
.seek(0);
4431 if(sys
.version
[0]=="2"):
4432 pycurlhead
= retrieved_headers
.read();
4433 if(sys
.version
[0]>="3"):
4434 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
4435 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip().rstrip('\r\n'))[0];
4436 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
4437 retrieved_body
.seek(0);
4438 except socket
.timeout
:
4439 log
.info("Error With URL "+httpurl
);
4441 except socket
.gaierror
:
4442 log
.info("Error With URL "+httpurl
);
4445 log
.info("Error With URL "+httpurl
);
4447 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
4448 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
4449 httpversionout
= pyhttpverinfo
[0];
4450 httpmethodout
= httpmethod
;
4451 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
4452 httpheaderout
= pycurlheadersout
;
4453 httpheadersentout
= httpheaders
;
4454 if(isinstance(httpheaderout
, list)):
4455 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
4456 if(sys
.version
[0]=="2"):
4458 prehttpheaderout
= httpheaderout
;
4459 httpheaderkeys
= httpheaderout
.keys();
4460 imax
= len(httpheaderkeys
);
4464 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4466 except AttributeError:
4468 httpheaderout
= fix_header_names(httpheaderout
);
4469 if(isinstance(httpheadersentout
, list)):
4470 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
4471 httpheadersentout
= fix_header_names(httpheadersentout
);
4472 downloadsize
= httpheaderout
.get('Content-Length');
4473 if(downloadsize
is not None):
4474 downloadsize
= int(downloadsize
);
4475 if downloadsize
is None: downloadsize
= 0;
4478 log
.info("Downloading URL "+httpurl
);
4479 with
BytesIO() as strbuf
:
4481 databytes
= retrieved_body
.read(buffersize
);
4482 if not databytes
: break;
4483 datasize
= len(databytes
);
4484 fulldatasize
= datasize
+ fulldatasize
;
4487 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4488 downloaddiff
= fulldatasize
- prevdownsize
;
4489 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4490 prevdownsize
= fulldatasize
;
4491 strbuf
.write(databytes
);
4493 returnval_content
= strbuf
.read();
4494 if(httpheaderout
.get("Content-Encoding")=="gzip"):
4496 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
4499 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
4501 returnval_content
= zlib
.decompress(returnval_content
);
4504 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4506 returnval_content
= brotli
.decompress(returnval_content
);
4507 except brotli
.error
:
4509 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
4511 returnval_content
= zstandard
.decompress(returnval_content
);
4512 except zstandard
.error
:
4514 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
4516 returnval_content
= lzma
.decompress(returnval_content
);
4517 except zstandard
.error
:
4519 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
4521 returnval_content
= bz2
.decompress(returnval_content
);
4522 except zstandard
.error
:
4524 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl3"};
4525 geturls_text
.close();
4529 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4530 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4533 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4534 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4535 returnval
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4538 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4539 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4540 returnval
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4543 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4544 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4545 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4546 exec_time_start
= time
.time();
4547 myhash
= hashlib
.new("sha1");
4548 if(sys
.version
[0]=="2"):
4549 myhash
.update(httpurl
);
4550 myhash
.update(str(buffersize
));
4551 myhash
.update(str(exec_time_start
));
4552 if(sys
.version
[0]>="3"):
4553 myhash
.update(httpurl
.encode('utf-8'));
4554 myhash
.update(str(buffersize
).encode('utf-8'));
4555 myhash
.update(str(exec_time_start
).encode('utf-8'));
4556 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4558 sleep
= geturls_download_sleep
;
4561 pretmpfilename
= download_from_url_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4562 if(not pretmpfilename
):
4564 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4565 tmpfilename
= f
.name
;
4567 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4568 except AttributeError:
4570 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4575 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4576 f
.write(pretmpfilename
.get('Content'));
4578 exec_time_end
= time
.time();
4579 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4580 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4584 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4585 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4588 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4589 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4590 returnval
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4593 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4594 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4595 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4598 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4599 def download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4600 global geturls_download_sleep
, havezstd
, havebrotli
;
4602 sleep
= geturls_download_sleep
;
4605 if(not outfile
=="-"):
4606 outpath
= outpath
.rstrip(os
.path
.sep
);
4607 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4608 if(not os
.path
.exists(outpath
)):
4609 os
.makedirs(outpath
);
4610 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4612 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4614 pretmpfilename
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4615 if(not pretmpfilename
):
4617 tmpfilename
= pretmpfilename
.get('Filename');
4618 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4620 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4621 exec_time_start
= time
.time();
4622 shutil
.move(tmpfilename
, filepath
);
4624 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4625 except AttributeError:
4627 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4632 exec_time_end
= time
.time();
4633 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4634 if(os
.path
.exists(tmpfilename
)):
4635 os
.remove(tmpfilename
);
4636 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4638 pretmpfilename
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4639 tmpfilename
= pretmpfilename
.get('Filename');
4640 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4643 exec_time_start
= time
.time();
4644 with
open(tmpfilename
, 'rb') as ft
:
4647 databytes
= ft
.read(buffersize
[1]);
4648 if not databytes
: break;
4649 datasize
= len(databytes
);
4650 fulldatasize
= datasize
+ fulldatasize
;
4653 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4654 downloaddiff
= fulldatasize
- prevdownsize
;
4655 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4656 prevdownsize
= fulldatasize
;
4659 fdata
= f
.getvalue();
4662 os
.remove(tmpfilename
);
4663 exec_time_end
= time
.time();
4664 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4665 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4669 def download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4670 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4673 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4674 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4675 returnval
= download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4678 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4679 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4680 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4683 def download_file_from_ftp_file(url
):
4684 urlparts
= urlparse
.urlparse(url
);
4685 file_name
= os
.path
.basename(urlparts
.path
);
4686 file_dir
= os
.path
.dirname(urlparts
.path
);
4687 if(urlparts
.username
is not None):
4688 ftp_username
= urlparts
.username
;
4690 ftp_username
= "anonymous";
4691 if(urlparts
.password
is not None):
4692 ftp_password
= urlparts
.password
;
4693 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4694 ftp_password
= "anonymous";
4697 if(urlparts
.scheme
=="ftp"):
4699 elif(urlparts
.scheme
=="ftps"):
4703 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4705 ftp_port
= urlparts
.port
;
4706 if(urlparts
.port
is None):
4709 ftp
.connect(urlparts
.hostname
, ftp_port
);
4710 except socket
.gaierror
:
4711 log
.info("Error With URL "+httpurl
);
4713 except socket
.timeout
:
4714 log
.info("Error With URL "+httpurl
);
4716 ftp
.login(urlparts
.username
, urlparts
.password
);
4717 if(urlparts
.scheme
=="ftps"):
4719 ftpfile
= BytesIO();
4720 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
4721 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
4726 def download_file_from_ftp_string(url
):
4727 ftpfile
= download_file_from_ftp_file(url
);
4728 return ftpfile
.read();
4730 def download_from_url_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4731 global geturls_download_sleep
, havezstd
, havebrotli
;
4733 sleep
= geturls_download_sleep
;
4736 urlparts
= urlparse
.urlparse(httpurl
);
4737 if(isinstance(httpheaders
, list)):
4738 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4739 httpheaders
= fix_header_names(httpheaders
);
4740 if(httpuseragent
is not None):
4741 if('User-Agent' in httpheaders
):
4742 httpheaders
['User-Agent'] = httpuseragent
;
4744 httpuseragent
.update({'User-Agent': httpuseragent
});
4745 if(httpreferer
is not None):
4746 if('Referer' in httpheaders
):
4747 httpheaders
['Referer'] = httpreferer
;
4749 httpuseragent
.update({'Referer': httpreferer
});
4750 if(isinstance(httpheaders
, dict)):
4751 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4753 geturls_text
= download_file_from_ftp_file(httpurl
);
4754 if(not geturls_text
):
4756 downloadsize
= None;
4757 if(downloadsize
is not None):
4758 downloadsize
= int(downloadsize
);
4759 if downloadsize
is None: downloadsize
= 0;
4762 log
.info("Downloading URL "+httpurl
);
4763 with
BytesIO() as strbuf
:
4765 databytes
= geturls_text
.read(buffersize
);
4766 if not databytes
: break;
4767 datasize
= len(databytes
);
4768 fulldatasize
= datasize
+ fulldatasize
;
4771 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4772 downloaddiff
= fulldatasize
- prevdownsize
;
4773 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4774 prevdownsize
= fulldatasize
;
4775 strbuf
.write(databytes
);
4777 returnval_content
= strbuf
.read();
4778 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4779 geturls_text
.close();
4782 def download_from_url_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4783 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4784 exec_time_start
= time
.time();
4785 myhash
= hashlib
.new("sha1");
4786 if(sys
.version
[0]=="2"):
4787 myhash
.update(httpurl
);
4788 myhash
.update(str(buffersize
));
4789 myhash
.update(str(exec_time_start
));
4790 if(sys
.version
[0]>="3"):
4791 myhash
.update(httpurl
.encode('utf-8'));
4792 myhash
.update(str(buffersize
).encode('utf-8'));
4793 myhash
.update(str(exec_time_start
).encode('utf-8'));
4794 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4796 sleep
= geturls_download_sleep
;
4799 pretmpfilename
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4800 if(not pretmpfilename
):
4802 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4803 tmpfilename
= f
.name
;
4805 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4806 except AttributeError:
4808 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4813 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4814 f
.write(pretmpfilename
.get('Content'));
4816 exec_time_end
= time
.time();
4817 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4818 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4821 def download_from_url_to_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4822 global geturls_download_sleep
, havezstd
, havebrotli
;
4824 sleep
= geturls_download_sleep
;
4827 if(not outfile
=="-"):
4828 outpath
= outpath
.rstrip(os
.path
.sep
);
4829 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4830 if(not os
.path
.exists(outpath
)):
4831 os
.makedirs(outpath
);
4832 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4834 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4836 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4837 if(not pretmpfilename
):
4839 tmpfilename
= pretmpfilename
.get('Filename');
4840 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4842 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4843 exec_time_start
= time
.time();
4844 shutil
.move(tmpfilename
, filepath
);
4845 exec_time_end
= time
.time();
4846 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4847 if(os
.path
.exists(tmpfilename
)):
4848 os
.remove(tmpfilename
);
4849 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4851 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4852 tmpfilename
= pretmpfilename
.get('Filename');
4853 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4856 exec_time_start
= time
.time();
4857 with
open(tmpfilename
, 'rb') as ft
:
4860 databytes
= ft
.read(buffersize
[1]);
4861 if not databytes
: break;
4862 datasize
= len(databytes
);
4863 fulldatasize
= datasize
+ fulldatasize
;
4866 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4867 downloaddiff
= fulldatasize
- prevdownsize
;
4868 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4869 prevdownsize
= fulldatasize
;
4872 fdata
= f
.getvalue();
4875 os
.remove(tmpfilename
);
4876 exec_time_end
= time
.time();
4877 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4878 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4881 def upload_file_to_ftp_file(ftpfile
, url
):
4882 urlparts
= urlparse
.urlparse(url
);
4883 file_name
= os
.path
.basename(urlparts
.path
);
4884 file_dir
= os
.path
.dirname(urlparts
.path
);
4885 if(urlparts
.username
is not None):
4886 ftp_username
= urlparts
.username
;
4888 ftp_username
= "anonymous";
4889 if(urlparts
.password
is not None):
4890 ftp_password
= urlparts
.password
;
4891 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4892 ftp_password
= "anonymous";
4895 if(urlparts
.scheme
=="ftp"):
4897 elif(urlparts
.scheme
=="ftps"):
4901 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4903 ftp_port
= urlparts
.port
;
4904 if(urlparts
.port
is None):
4907 ftp
.connect(urlparts
.hostname
, ftp_port
);
4908 except socket
.gaierror
:
4909 log
.info("Error With URL "+httpurl
);
4911 except socket
.timeout
:
4912 log
.info("Error With URL "+httpurl
);
4914 ftp
.login(urlparts
.username
, urlparts
.password
);
4915 if(urlparts
.scheme
=="ftps"):
4917 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
4922 def upload_file_to_ftp_string(ftpstring
, url
):
4923 ftpfileo
= BytesIO(ftpstring
);
4924 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
4929 def download_file_from_sftp_file(url
):
4930 urlparts
= urlparse
.urlparse(url
);
4931 file_name
= os
.path
.basename(urlparts
.path
);
4932 file_dir
= os
.path
.dirname(urlparts
.path
);
4933 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4935 sftp_port
= urlparts
.port
;
4936 if(urlparts
.port
is None):
4939 sftp_port
= urlparts
.port
;
4940 if(urlparts
.username
is not None):
4941 sftp_username
= urlparts
.username
;
4943 sftp_username
= "anonymous";
4944 if(urlparts
.password
is not None):
4945 sftp_password
= urlparts
.password
;
4946 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4947 sftp_password
= "anonymous";
4950 if(urlparts
.scheme
!="sftp"):
4952 ssh
= paramiko
.SSHClient();
4953 ssh
.load_system_host_keys();
4954 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
4956 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
4957 except paramiko
.ssh_exception
.SSHException
:
4959 except socket
.gaierror
:
4960 log
.info("Error With URL "+httpurl
);
4962 except socket
.timeout
:
4963 log
.info("Error With URL "+httpurl
);
4965 sftp
= ssh
.open_sftp();
4966 sftpfile
= BytesIO();
4967 sftp
.getfo(urlparts
.path
, sftpfile
);
4970 sftpfile
.seek(0, 0);
4973 def download_file_from_sftp_file(url
):
4977 def download_file_from_sftp_string(url
):
4978 sftpfile
= download_file_from_sftp_file(url
);
4979 return sftpfile
.read();
4981 def download_file_from_ftp_string(url
):
4985 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4986 global geturls_download_sleep
, havezstd
, havebrotli
;
4988 sleep
= geturls_download_sleep
;
4991 urlparts
= urlparse
.urlparse(httpurl
);
4992 if(isinstance(httpheaders
, list)):
4993 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4994 httpheaders
= fix_header_names(httpheaders
);
4995 if(httpuseragent
is not None):
4996 if('User-Agent' in httpheaders
):
4997 httpheaders
['User-Agent'] = httpuseragent
;
4999 httpuseragent
.update({'User-Agent': httpuseragent
});
5000 if(httpreferer
is not None):
5001 if('Referer' in httpheaders
):
5002 httpheaders
['Referer'] = httpreferer
;
5004 httpuseragent
.update({'Referer': httpreferer
});
5005 if(isinstance(httpheaders
, dict)):
5006 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
5008 geturls_text
= download_file_from_sftp_file(httpurl
);
5009 if(not geturls_text
):
5011 downloadsize
= None;
5012 if(downloadsize
is not None):
5013 downloadsize
= int(downloadsize
);
5014 if downloadsize
is None: downloadsize
= 0;
5017 log
.info("Downloading URL "+httpurl
);
5018 with
BytesIO() as strbuf
:
5020 databytes
= geturls_text
.read(buffersize
);
5021 if not databytes
: break;
5022 datasize
= len(databytes
);
5023 fulldatasize
= datasize
+ fulldatasize
;
5026 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5027 downloaddiff
= fulldatasize
- prevdownsize
;
5028 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5029 prevdownsize
= fulldatasize
;
5030 strbuf
.write(databytes
);
5032 returnval_content
= strbuf
.read();
5033 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5034 geturls_text
.close();
5037 if(not haveparamiko
):
5038 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5042 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5043 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
5044 exec_time_start
= time
.time();
5045 myhash
= hashlib
.new("sha1");
5046 if(sys
.version
[0]=="2"):
5047 myhash
.update(httpurl
);
5048 myhash
.update(str(buffersize
));
5049 myhash
.update(str(exec_time_start
));
5050 if(sys
.version
[0]>="3"):
5051 myhash
.update(httpurl
.encode('utf-8'));
5052 myhash
.update(str(buffersize
).encode('utf-8'));
5053 myhash
.update(str(exec_time_start
).encode('utf-8'));
5054 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
5056 sleep
= geturls_download_sleep
;
5059 pretmpfilename
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
5060 if(not pretmpfilename
):
5062 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
5063 tmpfilename
= f
.name
;
5065 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
5066 except AttributeError:
5068 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
5073 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5074 f
.write(pretmpfilename
.get('Content'));
5076 exec_time_end
= time
.time();
5077 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
5078 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
5081 if(not haveparamiko
):
5082 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5086 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5087 global geturls_download_sleep
, havezstd
, havebrotli
;
5089 sleep
= geturls_download_sleep
;
5092 if(not outfile
=="-"):
5093 outpath
= outpath
.rstrip(os
.path
.sep
);
5094 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
5095 if(not os
.path
.exists(outpath
)):
5096 os
.makedirs(outpath
);
5097 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
5099 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
5101 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5102 if(not pretmpfilename
):
5104 tmpfilename
= pretmpfilename
.get('Filename');
5105 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5107 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
5108 exec_time_start
= time
.time();
5109 shutil
.move(tmpfilename
, filepath
);
5110 exec_time_end
= time
.time();
5111 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
5112 if(os
.path
.exists(tmpfilename
)):
5113 os
.remove(tmpfilename
);
5114 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5116 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5117 tmpfilename
= pretmpfilename
.get('Filename');
5118 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5121 exec_time_start
= time
.time();
5122 with
open(tmpfilename
, 'rb') as ft
:
5125 databytes
= ft
.read(buffersize
[1]);
5126 if not databytes
: break;
5127 datasize
= len(databytes
);
5128 fulldatasize
= datasize
+ fulldatasize
;
5131 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5132 downloaddiff
= fulldatasize
- prevdownsize
;
5133 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5134 prevdownsize
= fulldatasize
;
5137 fdata
= f
.getvalue();
5140 os
.remove(tmpfilename
);
5141 exec_time_end
= time
.time();
5142 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5143 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5146 if(not haveparamiko
):
5147 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5151 def upload_file_to_sftp_file(sftpfile
, url
):
5152 urlparts
= urlparse
.urlparse(url
);
5153 file_name
= os
.path
.basename(urlparts
.path
);
5154 file_dir
= os
.path
.dirname(urlparts
.path
);
5155 sftp_port
= urlparts
.port
;
5156 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5158 if(urlparts
.port
is None):
5161 sftp_port
= urlparts
.port
;
5162 if(urlparts
.username
is not None):
5163 sftp_username
= urlparts
.username
;
5165 sftp_username
= "anonymous";
5166 if(urlparts
.password
is not None):
5167 sftp_password
= urlparts
.password
;
5168 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5169 sftp_password
= "anonymous";
5172 if(urlparts
.scheme
!="sftp"):
5174 ssh
= paramiko
.SSHClient();
5175 ssh
.load_system_host_keys();
5176 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
5178 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5179 except paramiko
.ssh_exception
.SSHException
:
5181 except socket
.gaierror
:
5182 log
.info("Error With URL "+httpurl
);
5184 except socket
.timeout
:
5185 log
.info("Error With URL "+httpurl
);
5187 sftp
= ssh
.open_sftp();
5188 sftp
.putfo(sftpfile
, urlparts
.path
);
5191 sftpfile
.seek(0, 0);
5194 def upload_file_to_sftp_file(sftpfile
, url
):
5198 def upload_file_to_sftp_string(sftpstring
, url
):
5199 sftpfileo
= BytesIO(sftpstring
);
5200 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
5204 def upload_file_to_sftp_string(url
):
5209 def download_file_from_pysftp_file(url
):
5210 urlparts
= urlparse
.urlparse(url
);
5211 file_name
= os
.path
.basename(urlparts
.path
);
5212 file_dir
= os
.path
.dirname(urlparts
.path
);
5213 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5215 sftp_port
= urlparts
.port
;
5216 if(urlparts
.port
is None):
5219 sftp_port
= urlparts
.port
;
5220 if(urlparts
.username
is not None):
5221 sftp_username
= urlparts
.username
;
5223 sftp_username
= "anonymous";
5224 if(urlparts
.password
is not None):
5225 sftp_password
= urlparts
.password
;
5226 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5227 sftp_password
= "anonymous";
5230 if(urlparts
.scheme
!="sftp"):
5233 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5234 except paramiko
.ssh_exception
.SSHException
:
5236 except socket
.gaierror
:
5237 log
.info("Error With URL "+httpurl
);
5239 except socket
.timeout
:
5240 log
.info("Error With URL "+httpurl
);
5242 sftp
= ssh
.open_sftp();
5243 sftpfile
= BytesIO();
5244 sftp
.getfo(urlparts
.path
, sftpfile
);
5247 sftpfile
.seek(0, 0);
5250 def download_file_from_pysftp_file(url
):
5254 def download_file_from_pysftp_string(url
):
5255 sftpfile
= download_file_from_pysftp_file(url
);
5256 return sftpfile
.read();
5258 def download_file_from_ftp_string(url
):
5262 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5263 global geturls_download_sleep
, havezstd
, havebrotli
;
5265 sleep
= geturls_download_sleep
;
5268 urlparts
= urlparse
.urlparse(httpurl
);
5269 if(isinstance(httpheaders
, list)):
5270 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
5271 httpheaders
= fix_header_names(httpheaders
);
5272 if(isinstance(httpheaders
, dict)):
5273 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
5275 geturls_text
= download_file_from_pysftp_file(httpurl
);
5276 if(not geturls_text
):
5278 downloadsize
= None;
5279 if(downloadsize
is not None):
5280 downloadsize
= int(downloadsize
);
5281 if downloadsize
is None: downloadsize
= 0;
5284 log
.info("Downloading URL "+httpurl
);
5285 with
BytesIO() as strbuf
:
5287 databytes
= geturls_text
.read(buffersize
);
5288 if not databytes
: break;
5289 datasize
= len(databytes
);
5290 fulldatasize
= datasize
+ fulldatasize
;
5293 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5294 downloaddiff
= fulldatasize
- prevdownsize
;
5295 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5296 prevdownsize
= fulldatasize
;
5297 strbuf
.write(databytes
);
5299 returnval_content
= strbuf
.read();
5300 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5301 geturls_text
.close();
5305 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5309 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5310 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
5311 exec_time_start
= time
.time();
5312 myhash
= hashlib
.new("sha1");
5313 if(sys
.version
[0]=="2"):
5314 myhash
.update(httpurl
);
5315 myhash
.update(str(buffersize
));
5316 myhash
.update(str(exec_time_start
));
5317 if(sys
.version
[0]>="3"):
5318 myhash
.update(httpurl
.encode('utf-8'));
5319 myhash
.update(str(buffersize
).encode('utf-8'));
5320 myhash
.update(str(exec_time_start
).encode('utf-8'));
5321 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
5323 sleep
= geturls_download_sleep
;
5326 pretmpfilename
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
5327 if(not pretmpfilename
):
5329 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
5330 tmpfilename
= f
.name
;
5332 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
5333 except AttributeError:
5335 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
5340 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5341 f
.write(pretmpfilename
.get('Content'));
5343 exec_time_end
= time
.time();
5344 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
5345 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
5349 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5353 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5354 global geturls_download_sleep
, havezstd
, havebrotli
;
5356 sleep
= geturls_download_sleep
;
5359 if(not outfile
=="-"):
5360 outpath
= outpath
.rstrip(os
.path
.sep
);
5361 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
5362 if(not os
.path
.exists(outpath
)):
5363 os
.makedirs(outpath
);
5364 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
5366 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
5368 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5369 if(not pretmpfilename
):
5371 tmpfilename
= pretmpfilename
.get('Filename');
5372 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5374 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
5375 exec_time_start
= time
.time();
5376 shutil
.move(tmpfilename
, filepath
);
5377 exec_time_end
= time
.time();
5378 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
5379 if(os
.path
.exists(tmpfilename
)):
5380 os
.remove(tmpfilename
);
5381 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5383 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5384 tmpfilename
= pretmpfilename
.get('Filename');
5385 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5388 exec_time_start
= time
.time();
5389 with
open(tmpfilename
, 'rb') as ft
:
5392 databytes
= ft
.read(buffersize
[1]);
5393 if not databytes
: break;
5394 datasize
= len(databytes
);
5395 fulldatasize
= datasize
+ fulldatasize
;
5398 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5399 downloaddiff
= fulldatasize
- prevdownsize
;
5400 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5401 prevdownsize
= fulldatasize
;
5404 fdata
= f
.getvalue();
5407 os
.remove(tmpfilename
);
5408 exec_time_end
= time
.time();
5409 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5410 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5414 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5418 def upload_file_to_pysftp_file(sftpfile
, url
):
5419 urlparts
= urlparse
.urlparse(url
);
5420 file_name
= os
.path
.basename(urlparts
.path
);
5421 file_dir
= os
.path
.dirname(urlparts
.path
);
5422 sftp_port
= urlparts
.port
;
5423 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5425 if(urlparts
.port
is None):
5428 sftp_port
= urlparts
.port
;
5429 if(urlparts
.username
is not None):
5430 sftp_username
= urlparts
.username
;
5432 sftp_username
= "anonymous";
5433 if(urlparts
.password
is not None):
5434 sftp_password
= urlparts
.password
;
5435 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5436 sftp_password
= "anonymous";
5439 if(urlparts
.scheme
!="sftp"):
5442 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5443 except paramiko
.ssh_exception
.SSHException
:
5445 except socket
.gaierror
:
5446 log
.info("Error With URL "+httpurl
);
5448 except socket
.timeout
:
5449 log
.info("Error With URL "+httpurl
);
5451 sftp
= ssh
.open_sftp();
5452 sftp
.putfo(sftpfile
, urlparts
.path
);
5455 sftpfile
.seek(0, 0);
5458 def upload_file_to_pysftp_file(sftpfile
, url
):
5462 def upload_file_to_pysftp_string(sftpstring
, url
):
5463 sftpfileo
= BytesIO(sftpstring
);
5464 sftpfile
= upload_file_to_pysftp_files(ftpfileo
, url
);
5468 def upload_file_to_pysftp_string(url
):