4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwgetold.py - Last Update: 10/5/2023 Ver. 2.0.2 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, zlib
, time
, argparse
, cgi
, subprocess
, socket
, email
.utils
, datetime
, time
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
29 havemechanize
= False;
34 havemechanize
= False;
62 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
102 if(sys
.version
[0]=="2"):
104 from io
import StringIO
, BytesIO
;
107 from cStringIO
import StringIO
;
108 from cStringIO
import StringIO
as BytesIO
;
110 from StringIO
import StringIO
;
111 from StringIO
import StringIO
as BytesIO
;
112 # From http://python-future.org/compatible_idioms.html
113 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
114 from urllib
import urlencode
;
115 from urllib
import urlopen
as urlopenalt
;
116 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
, URLError
, build_opener
, HTTPCookieProcessor
;
117 import urlparse
, cookielib
;
118 from httplib
import HTTPConnection
, HTTPSConnection
;
119 if(sys
.version
[0]>="3"):
120 from io
import StringIO
, BytesIO
;
121 # From http://python-future.org/compatible_idioms.html
122 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
123 from urllib
.request
import urlopen
, Request
, install_opener
, build_opener
, HTTPCookieProcessor
;
124 from urllib
.error
import HTTPError
, URLError
;
125 import urllib
.parse
as urlparse
;
126 import http
.cookiejar
as cookielib
;
127 from http
.client
import HTTPConnection
, HTTPSConnection
;
129 __program_name__
= "PyWWW-Get";
130 __program_alt_name__
= "PyWWWGet";
131 __program_small_name__
= "wwwget";
132 __project__
= __program_name__
;
133 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
134 __version_info__
= (2, 0, 2, "RC 1", 1);
135 __version_date_info__
= (2023, 10, 5, "RC 1", 1);
136 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
137 __revision__
= __version_info__
[3];
138 __revision_id__
= "$Id$";
139 if(__version_info__
[4] is not None):
140 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
141 if(__version_info__
[4] is None):
142 __version_date_plusrc__
= __version_date__
;
143 if(__version_info__
[3] is not None):
144 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
145 if(__version_info__
[3] is None):
146 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
148 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
150 pytempdir
= tempfile
.gettempdir();
152 PyBitness
= platform
.architecture();
153 if(PyBitness
=="32bit" or PyBitness
=="32"):
155 elif(PyBitness
=="64bit" or PyBitness
=="64"):
160 compression_supported_list
= ['identity', 'gzip', 'deflate', 'bzip2'];
162 compression_supported_list
.append('br');
164 compression_supported_list
.append('zstd');
166 compression_supported_list
.append('lzma');
167 compression_supported_list
.append('xz');
168 compression_supported
= ', '.join(compression_supported_list
);
170 geturls_cj
= cookielib
.CookieJar();
171 windowsNT4_ua_string
= "Windows NT 4.0";
172 windowsNT4_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
173 windows2k_ua_string
= "Windows NT 5.0";
174 windows2k_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
175 windowsXP_ua_string
= "Windows NT 5.1";
176 windowsXP_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
177 windowsXP64_ua_string
= "Windows NT 5.2; Win64; x64";
178 windowsXP64_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
179 windows7_ua_string
= "Windows NT 6.1; Win64; x64";
180 windows7_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
181 windows8_ua_string
= "Windows NT 6.2; Win64; x64";
182 windows8_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
183 windows81_ua_string
= "Windows NT 6.3; Win64; x64";
184 windows81_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
185 windows10_ua_string
= "Windows NT 10.0; Win64; x64";
186 windows10_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
187 windows11_ua_string
= "Windows NT 11.0; Win64; x64";
188 windows11_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
189 geturls_ua_firefox_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:109.0) Gecko/20100101 Firefox/117.0";
190 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
191 geturls_ua_chrome_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
192 geturls_ua_chromium_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
193 geturls_ua_palemoon_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
194 geturls_ua_opera_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
195 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
196 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; Trident/7.0; rv:11.0) like Gecko";
197 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
198 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
199 if(platform
.python_implementation()!=""):
200 py_implementation
= platform
.python_implementation();
201 if(platform
.python_implementation()==""):
202 py_implementation
= "Python";
203 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=py_implementation
, pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
204 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
205 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
206 geturls_ua
= geturls_ua_firefox_windows7
;
207 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
208 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
209 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
210 geturls_headers_chrome_windows7
.update(windows7_ua_addon
);
211 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
212 geturls_headers_chromium_windows7
.update(windows7_ua_addon
);
213 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
214 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
215 geturls_headers_opera_windows7
.update(windows7_ua_addon
);
216 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
217 geturls_headers_vivaldi_windows7
.update(windows7_ua_addon
);
218 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
219 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
220 geturls_headers_microsoft_edge_windows7
.update(windows7_ua_addon
);
221 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
222 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
223 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
224 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
225 geturls_headers
= geturls_headers_firefox_windows7
;
226 geturls_download_sleep
= 0;
228 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
229 if(outtype
=="print" and dbgenable
):
232 elif(outtype
=="log" and dbgenable
):
233 logging
.info(dbgtxt
);
235 elif(outtype
=="warning" and dbgenable
):
236 logging
.warning(dbgtxt
);
238 elif(outtype
=="error" and dbgenable
):
239 logging
.error(dbgtxt
);
241 elif(outtype
=="critical" and dbgenable
):
242 logging
.critical(dbgtxt
);
244 elif(outtype
=="exception" and dbgenable
):
245 logging
.exception(dbgtxt
);
247 elif(outtype
=="logalt" and dbgenable
):
248 logging
.log(dgblevel
, dbgtxt
);
250 elif(outtype
=="debug" and dbgenable
):
251 logging
.debug(dbgtxt
);
259 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
260 dbgout
= verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
265 def add_url_param(url
, **params
):
267 parts
= list(urlparse
.urlsplit(url
));
268 d
= dict(cgi
.parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
270 parts
[n
]=urlencode(d
);
271 return urlparse
.urlunsplit(parts
);
273 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
274 def which_exec(execfile):
275 for path
in os
.environ
["PATH"].split(":"):
276 if os
.path
.exists(path
+ "/" + execfile):
277 return path
+ "/" + execfile;
279 def listize(varlist
):
287 newlistreg
.update({ilx
: varlist
[il
]});
288 newlistrev
.update({varlist
[il
]: ilx
});
291 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
294 def twolistize(varlist
):
304 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
305 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
306 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
307 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
310 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
311 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
312 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
315 def arglistize(proexec
, *varlist
):
319 newarglist
= [proexec
];
321 if varlist
[il
][0] is not None:
322 newarglist
.append(varlist
[il
][0]);
323 if varlist
[il
][1] is not None:
324 newarglist
.append(varlist
[il
][1]);
328 def fix_header_names(header_dict
):
329 if(sys
.version
[0]=="2"):
330 header_dict
= {k
.title(): v
for k
, v
in header_dict
.iteritems()};
331 if(sys
.version
[0]>="3"):
332 header_dict
= {k
.title(): v
for k
, v
in header_dict
.items()};
335 # hms_string by ArcGIS Python Recipes
336 # https://arcpy.wordpress.com/2012/04/20/146/
337 def hms_string(sec_elapsed
):
338 h
= int(sec_elapsed
/ (60 * 60));
339 m
= int((sec_elapsed
% (60 * 60)) / 60);
340 s
= sec_elapsed
% 60.0;
341 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
343 # get_readable_size by Lipis
344 # http://stackoverflow.com/posts/14998888/revisions
345 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
347 if(unit
!="IEC" and unit
!="SI"):
350 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
351 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
354 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
355 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
360 if abs(bytes
) < unitsize
:
361 strformat
= "%3."+str(precision
)+"f%s";
362 pre_return_val
= (strformat
% (bytes
, unit
));
363 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
364 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
365 alt_return_val
= pre_return_val
.split();
366 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
369 strformat
= "%."+str(precision
)+"f%s";
370 pre_return_val
= (strformat
% (bytes
, "YiB"));
371 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
372 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
373 alt_return_val
= pre_return_val
.split();
374 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
377 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
379 usehashtypes
= usehashtypes
.lower();
380 getfilesize
= os
.path
.getsize(infile
);
381 return_val
= get_readable_size(getfilesize
, precision
, unit
);
383 hashtypelist
= usehashtypes
.split(",");
384 openfile
= open(infile
, "rb");
385 filecontents
= openfile
.read();
388 listnumend
= len(hashtypelist
);
389 while(listnumcount
< listnumend
):
390 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
391 hashtypelistup
= hashtypelistlow
.upper();
392 filehash
= hashlib
.new(hashtypelistup
);
393 filehash
.update(filecontents
);
394 filegethash
= filehash
.hexdigest();
395 return_val
.update({hashtypelistup
: filegethash
});
399 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
401 usehashtypes
= usehashtypes
.lower();
402 getfilesize
= len(instring
);
403 return_val
= get_readable_size(getfilesize
, precision
, unit
);
405 hashtypelist
= usehashtypes
.split(",");
407 listnumend
= len(hashtypelist
);
408 while(listnumcount
< listnumend
):
409 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
410 hashtypelistup
= hashtypelistlow
.upper();
411 filehash
= hashlib
.new(hashtypelistup
);
412 if(sys
.version
[0]=="2"):
413 filehash
.update(instring
);
414 if(sys
.version
[0]>="3"):
415 filehash
.update(instring
.encode('utf-8'));
416 filegethash
= filehash
.hexdigest();
417 return_val
.update({hashtypelistup
: filegethash
});
421 def http_status_to_reason(code
):
424 101: 'Switching Protocols',
429 203: 'Non-Authoritative Information',
431 205: 'Reset Content',
432 206: 'Partial Content',
434 208: 'Already Reported',
436 300: 'Multiple Choices',
437 301: 'Moved Permanently',
442 307: 'Temporary Redirect',
443 308: 'Permanent Redirect',
446 402: 'Payment Required',
449 405: 'Method Not Allowed',
450 406: 'Not Acceptable',
451 407: 'Proxy Authentication Required',
452 408: 'Request Timeout',
455 411: 'Length Required',
456 412: 'Precondition Failed',
457 413: 'Payload Too Large',
459 415: 'Unsupported Media Type',
460 416: 'Range Not Satisfiable',
461 417: 'Expectation Failed',
462 421: 'Misdirected Request',
463 422: 'Unprocessable Entity',
465 424: 'Failed Dependency',
466 426: 'Upgrade Required',
467 428: 'Precondition Required',
468 429: 'Too Many Requests',
469 431: 'Request Header Fields Too Large',
470 451: 'Unavailable For Legal Reasons',
471 500: 'Internal Server Error',
472 501: 'Not Implemented',
474 503: 'Service Unavailable',
475 504: 'Gateway Timeout',
476 505: 'HTTP Version Not Supported',
477 506: 'Variant Also Negotiates',
478 507: 'Insufficient Storage',
479 508: 'Loop Detected',
481 511: 'Network Authentication Required'
483 return reasons
.get(code
, 'Unknown Status Code');
485 def ftp_status_to_reason(code
):
487 110: 'Restart marker reply',
488 120: 'Service ready in nnn minutes',
489 125: 'Data connection already open; transfer starting',
490 150: 'File status okay; about to open data connection',
492 202: 'Command not implemented, superfluous at this site',
493 211: 'System status, or system help reply',
494 212: 'Directory status',
497 215: 'NAME system type',
498 220: 'Service ready for new user',
499 221: 'Service closing control connection',
500 225: 'Data connection open; no transfer in progress',
501 226: 'Closing data connection',
502 227: 'Entering Passive Mode',
503 230: 'User logged in, proceed',
504 250: 'Requested file action okay, completed',
505 257: '"PATHNAME" created',
506 331: 'User name okay, need password',
507 332: 'Need account for login',
508 350: 'Requested file action pending further information',
509 421: 'Service not available, closing control connection',
510 425: 'Can\'t open data connection',
511 426: 'Connection closed; transfer aborted',
512 450: 'Requested file action not taken',
513 451: 'Requested action aborted. Local error in processing',
514 452: 'Requested action not taken. Insufficient storage space in system',
515 500: 'Syntax error, command unrecognized',
516 501: 'Syntax error in parameters or arguments',
517 502: 'Command not implemented',
518 503: 'Bad sequence of commands',
519 504: 'Command not implemented for that parameter',
520 530: 'Not logged in',
521 532: 'Need account for storing files',
522 550: 'Requested action not taken. File unavailable',
523 551: 'Requested action aborted. Page type unknown',
524 552: 'Requested file action aborted. Exceeded storage allocation',
525 553: 'Requested action not taken. File name not allowed'
527 return reasons
.get(code
, 'Unknown Status Code');
529 def sftp_status_to_reason(code
):
533 2: 'SSH_FX_NO_SUCH_FILE',
534 3: 'SSH_FX_PERMISSION_DENIED',
536 5: 'SSH_FX_BAD_MESSAGE',
537 6: 'SSH_FX_NO_CONNECTION',
538 7: 'SSH_FX_CONNECTION_LOST',
539 8: 'SSH_FX_OP_UNSUPPORTED'
541 return reasons
.get(code
, 'Unknown Status Code');
543 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
544 if isinstance(headers
, dict):
546 if(sys
.version
[0]=="2"):
547 for headkey
, headvalue
in headers
.iteritems():
548 returnval
.append((headkey
, headvalue
));
549 if(sys
.version
[0]>="3"):
550 for headkey
, headvalue
in headers
.items():
551 returnval
.append((headkey
, headvalue
));
552 elif isinstance(headers
, list):
558 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
559 if isinstance(headers
, dict):
561 if(sys
.version
[0]=="2"):
562 for headkey
, headvalue
in headers
.iteritems():
563 returnval
.append(headkey
+": "+headvalue
);
564 if(sys
.version
[0]>="3"):
565 for headkey
, headvalue
in headers
.items():
566 returnval
.append(headkey
+": "+headvalue
);
567 elif isinstance(headers
, list):
573 def make_http_headers_from_pycurl_to_dict(headers
):
575 headers
= headers
.strip().split('\r\n');
576 for header
in headers
:
577 parts
= header
.split(': ', 1)
580 header_dict
[key
.title()] = value
;
583 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", compression_supported
), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
584 if isinstance(headers
, list):
589 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
591 elif isinstance(headers
, dict):
597 def get_httplib_support(checkvalue
=None):
598 global haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
600 returnval
.append("ftp");
601 returnval
.append("httplib");
603 returnval
.append("httplib2");
604 returnval
.append("urllib");
606 returnval
.append("urllib3");
607 returnval
.append("request3");
608 returnval
.append("request");
610 returnval
.append("requests");
612 returnval
.append("aiohttp");
614 returnval
.append("httpx");
615 returnval
.append("httpx2");
617 returnval
.append("mechanize");
619 returnval
.append("pycurl");
620 if(hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
621 returnval
.append("pycurl2");
622 if(hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
623 returnval
.append("pycurl3");
625 returnval
.append("sftp");
627 returnval
.append("pysftp");
628 if(not checkvalue
is None):
629 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
630 checkvalue
= "urllib";
631 if(checkvalue
=="httplib1"):
632 checkvalue
= "httplib";
633 if(checkvalue
in returnval
):
639 def check_httplib_support(checkvalue
="urllib"):
640 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
641 checkvalue
= "urllib";
642 if(checkvalue
=="httplib1"):
643 checkvalue
= "httplib";
644 returnval
= get_httplib_support(checkvalue
);
647 def get_httplib_support_list():
648 returnval
= get_httplib_support(None);
651 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1, timeout
=10):
652 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
654 sleep
= geturls_download_sleep
;
657 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
658 httplibuse
= "urllib";
659 if(httplibuse
=="httplib1"):
660 httplibuse
= "httplib";
661 if(not haverequests
and httplibuse
=="requests"):
662 httplibuse
= "urllib";
663 if(not haveaiohttp
and httplibuse
=="aiohttp"):
664 httplibuse
= "urllib";
665 if(not havehttpx
and httplibuse
=="httpx"):
666 httplibuse
= "urllib";
667 if(not havehttpx
and httplibuse
=="httpx2"):
668 httplibuse
= "urllib";
669 if(not havehttpcore
and httplibuse
=="httpcore"):
670 httplibuse
= "urllib";
671 if(not havehttpcore
and httplibuse
=="httpcore2"):
672 httplibuse
= "urllib";
673 if(not havemechanize
and httplibuse
=="mechanize"):
674 httplibuse
= "urllib";
675 if(not havepycurl
and httplibuse
=="pycurl"):
676 httplibuse
= "urllib";
677 if(not havepycurl
and httplibuse
=="pycurl2"):
678 httplibuse
= "urllib";
679 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
680 httplibuse
= "pycurl";
681 if(not havepycurl
and httplibuse
=="pycurl3"):
682 httplibuse
= "urllib";
683 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
684 httplibuse
= "pycurl2";
685 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
686 httplibuse
= "pycurl";
687 if(not havehttplib2
and httplibuse
=="httplib2"):
688 httplibuse
= "httplib";
689 if(not haveparamiko
and httplibuse
=="sftp"):
691 if(not havepysftp
and httplibuse
=="pysftp"):
693 if(httplibuse
=="urllib" or httplibuse
=="request"):
694 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
695 elif(httplibuse
=="request"):
696 returnval
= download_from_url_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
697 elif(httplibuse
=="request3"):
698 returnval
= download_from_url_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
699 elif(httplibuse
=="httplib"):
700 returnval
= download_from_url_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
701 elif(httplibuse
=="httplib2"):
702 returnval
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
703 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
704 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
705 elif(httplibuse
=="requests"):
706 returnval
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
707 elif(httplibuse
=="aiohttp"):
708 returnval
= download_from_url_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
709 elif(httplibuse
=="httpx"):
710 returnval
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
711 elif(httplibuse
=="httpx2"):
712 returnval
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
713 elif(httplibuse
=="httpcore"):
714 returnval
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
715 elif(httplibuse
=="httpcore2"):
716 returnval
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
717 elif(httplibuse
=="mechanize"):
718 returnval
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
719 elif(httplibuse
=="pycurl"):
720 returnval
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
721 elif(httplibuse
=="pycurl2"):
722 returnval
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
723 elif(httplibuse
=="pycurl3"):
724 returnval
= download_from_url_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
725 elif(httplibuse
=="ftp"):
726 returnval
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
727 elif(httplibuse
=="sftp"):
728 returnval
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
729 elif(httplibuse
=="pysftp"):
730 returnval
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
735 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
736 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
738 sleep
= geturls_download_sleep
;
741 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
742 httplibuse
= "urllib";
743 if(httplibuse
=="httplib1"):
744 httplibuse
= "httplib";
745 if(not haverequests
and httplibuse
=="requests"):
746 httplibuse
= "urllib";
747 if(not haveaiohttp
and httplibuse
=="aiohttp"):
748 httplibuse
= "urllib";
749 if(not havehttpx
and httplibuse
=="httpx"):
750 httplibuse
= "urllib";
751 if(not havehttpx
and httplibuse
=="httpx2"):
752 httplibuse
= "urllib";
753 if(not havehttpcore
and httplibuse
=="httpcore"):
754 httplibuse
= "urllib";
755 if(not havehttpcore
and httplibuse
=="httpcore2"):
756 httplibuse
= "urllib";
757 if(not havemechanize
and httplibuse
=="mechanize"):
758 httplibuse
= "urllib";
759 if(not havepycurl
and httplibuse
=="pycurl"):
760 httplibuse
= "urllib";
761 if(not havepycurl
and httplibuse
=="pycurl2"):
762 httplibuse
= "urllib";
763 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
764 httplibuse
= "pycurl";
765 if(not havepycurl
and httplibuse
=="pycurl3"):
766 httplibuse
= "urllib";
767 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
768 httplibuse
= "pycurl2";
769 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
770 httplibuse
= "pycurl";
771 if(not havehttplib2
and httplibuse
=="httplib2"):
772 httplibuse
= "httplib";
773 if(not haveparamiko
and httplibuse
=="sftp"):
775 if(not haveparamiko
and httplibuse
=="pysftp"):
777 if(httplibuse
=="urllib" or httplibuse
=="request"):
778 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
779 elif(httplibuse
=="request"):
780 returnval
= download_from_url_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
781 elif(httplibuse
=="request3"):
782 returnval
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
783 elif(httplibuse
=="httplib"):
784 returnval
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
785 elif(httplibuse
=="httplib2"):
786 returnval
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
787 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
788 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
789 elif(httplibuse
=="requests"):
790 returnval
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
791 elif(httplibuse
=="aiohttp"):
792 returnval
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
793 elif(httplibuse
=="httpx"):
794 returnval
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
795 elif(httplibuse
=="httpx2"):
796 returnval
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
797 elif(httplibuse
=="httpcore"):
798 returnval
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
799 elif(httplibuse
=="httpcore2"):
800 returnval
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
801 elif(httplibuse
=="mechanize"):
802 returnval
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
803 elif(httplibuse
=="pycurl"):
804 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
805 elif(httplibuse
=="pycurl2"):
806 returnval
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
807 elif(httplibuse
=="pycurl3"):
808 returnval
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
809 elif(httplibuse
=="ftp"):
810 returnval
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
811 elif(httplibuse
=="sftp"):
812 returnval
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
813 elif(httplibuse
=="pysftp"):
814 returnval
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
819 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
820 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
822 sleep
= geturls_download_sleep
;
825 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
826 httplibuse
= "urllib";
827 if(httplibuse
=="httplib1"):
828 httplibuse
= "httplib";
829 if(not haverequests
and httplibuse
=="requests"):
830 httplibuse
= "urllib";
831 if(not haveaiohttp
and httplibuse
=="aiohttp"):
832 httplibuse
= "urllib";
833 if(not havehttpx
and httplibuse
=="httpx"):
834 httplibuse
= "urllib";
835 if(not havehttpx
and httplibuse
=="httpx2"):
836 httplibuse
= "urllib";
837 if(not havehttpcore
and httplibuse
=="httpcore"):
838 httplibuse
= "urllib";
839 if(not havehttpcore
and httplibuse
=="httpcore2"):
840 httplibuse
= "urllib";
841 if(not havemechanize
and httplibuse
=="mechanize"):
842 httplibuse
= "urllib";
843 if(not havepycurl
and httplibuse
=="pycurl"):
844 httplibuse
= "urllib";
845 if(not havepycurl
and httplibuse
=="pycurl2"):
846 httplibuse
= "urllib";
847 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
848 httplibuse
= "pycurl";
849 if(not havepycurl
and httplibuse
=="pycurl3"):
850 httplibuse
= "urllib";
851 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
852 httplibuse
= "pycurl2";
853 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
854 httplibuse
= "pycurl";
855 if(not havehttplib2
and httplibuse
=="httplib2"):
856 httplibuse
= "httplib";
857 if(not haveparamiko
and httplibuse
=="sftp"):
859 if(not havepysftp
and httplibuse
=="pysftp"):
861 if(httplibuse
=="urllib" or httplibuse
=="request"):
862 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
863 elif(httplibuse
=="request"):
864 returnval
= download_from_url_to_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
865 elif(httplibuse
=="request3"):
866 returnval
= download_from_url_to_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
867 elif(httplibuse
=="httplib"):
868 returnval
= download_from_url_to_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
869 elif(httplibuse
=="httplib2"):
870 returnval
= download_from_url_to_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
871 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
872 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
873 elif(httplibuse
=="requests"):
874 returnval
= download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
875 elif(httplibuse
=="aiohttp"):
876 returnval
= download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
877 elif(httplibuse
=="httpx"):
878 returnval
= download_from_url_to_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
879 elif(httplibuse
=="httpx2"):
880 returnval
= download_from_url_to_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
881 elif(httplibuse
=="httpcore"):
882 returnval
= download_from_url_to_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
883 elif(httplibuse
=="httpcore2"):
884 returnval
= download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
885 elif(httplibuse
=="mechanize"):
886 returnval
= download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
887 elif(httplibuse
=="pycurl"):
888 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
889 elif(httplibuse
=="pycurl2"):
890 returnval
= download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
891 elif(httplibuse
=="pycurl3"):
892 returnval
= download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
893 elif(httplibuse
=="ftp"):
894 returnval
= download_from_url_to_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
895 elif(httplibuse
=="sftp"):
896 returnval
= download_from_url_to_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
897 elif(httplibuse
=="pysftp"):
898 returnval
= download_from_url_to_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
903 def download_from_url_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
904 global geturls_download_sleep
, havezstd
, havebrotli
;
906 sleep
= geturls_download_sleep
;
909 urlparts
= urlparse
.urlparse(httpurl
);
910 if(isinstance(httpheaders
, list)):
911 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
912 httpheaders
= fix_header_names(httpheaders
);
913 if(httpuseragent
is not None):
914 if('User-Agent' in httpheaders
):
915 httpheaders
['User-Agent'] = httpuseragent
;
917 httpuseragent
.update({'User-Agent': httpuseragent
});
918 if(httpreferer
is not None):
919 if('Referer' in httpheaders
):
920 httpheaders
['Referer'] = httpreferer
;
922 httpuseragent
.update({'Referer': httpreferer
});
923 if(urlparts
.username
is not None or urlparts
.password
is not None):
924 if(sys
.version
[0]=="2"):
925 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
926 if(sys
.version
[0]>="3"):
927 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
928 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
929 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
930 if(isinstance(httpheaders
, dict)):
931 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
932 geturls_opener
.addheaders
= httpheaders
;
934 if(postdata
is not None and not isinstance(postdata
, dict)):
935 postdata
= urlencode(postdata
);
937 geturls_request
= Request(httpurl
);
938 if(httpmethod
=="GET"):
939 geturls_text
= geturls_opener
.open(geturls_request
);
940 elif(httpmethod
=="POST"):
941 geturls_text
= geturls_opener
.open(geturls_request
, data
=postdata
);
943 geturls_text
= geturls_opener
.open(geturls_request
);
944 except HTTPError
as geturls_text_error
:
945 geturls_text
= geturls_text_error
;
946 log
.info("Error With URL "+httpurl
);
948 log
.info("Error With URL "+httpurl
);
950 except socket
.timeout
:
951 log
.info("Error With URL "+httpurl
);
953 httpcodeout
= geturls_text
.getcode();
955 httpcodereason
= geturls_text
.reason
;
956 except AttributeError:
957 httpcodereason
= http_status_to_reason(geturls_text
.getcode());
959 httpversionout
= geturls_text
.version
;
960 except AttributeError:
961 httpversionout
= "1.1";
962 httpmethodout
= geturls_request
.get_method();
963 httpurlout
= geturls_text
.geturl();
964 httpheaderout
= geturls_text
.info();
965 httpheadersentout
= httpheaders
;
966 if(isinstance(httpheaderout
, list)):
967 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
968 httpheaderout
= fix_header_names(httpheaderout
);
969 if(sys
.version
[0]=="2"):
971 prehttpheaderout
= httpheaderout
;
972 httpheaderkeys
= httpheaderout
.keys();
973 imax
= len(httpheaderkeys
);
977 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
979 except AttributeError:
981 if(isinstance(httpheadersentout
, list)):
982 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
983 httpheadersentout
= fix_header_names(httpheadersentout
);
984 downloadsize
= httpheaderout
.get('Content-Length');
985 if(downloadsize
is not None):
986 downloadsize
= int(downloadsize
);
987 if downloadsize
is None: downloadsize
= 0;
990 log
.info("Downloading URL "+httpurl
);
991 with
BytesIO() as strbuf
:
993 databytes
= geturls_text
.read(buffersize
);
994 if not databytes
: break;
995 datasize
= len(databytes
);
996 fulldatasize
= datasize
+ fulldatasize
;
999 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1000 downloaddiff
= fulldatasize
- prevdownsize
;
1001 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1002 prevdownsize
= fulldatasize
;
1003 strbuf
.write(databytes
);
1005 returnval_content
= strbuf
.read();
1006 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1008 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1011 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1013 returnval_content
= zlib
.decompress(returnval_content
);
1016 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1018 returnval_content
= brotli
.decompress(returnval_content
);
1019 except brotli
.error
:
1021 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1023 returnval_content
= zstandard
.decompress(returnval_content
);
1024 except zstandard
.error
:
1026 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1028 returnval_content
= lzma
.decompress(returnval_content
);
1029 except zstandard
.error
:
1031 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1033 returnval_content
= bz2
.decompress(returnval_content
);
1034 except zstandard
.error
:
1036 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "urllib"};
1037 geturls_text
.close();
1040 def download_from_url_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1041 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1042 exec_time_start
= time
.time();
1043 myhash
= hashlib
.new("sha1");
1044 if(sys
.version
[0]=="2"):
1045 myhash
.update(httpurl
);
1046 myhash
.update(str(buffersize
));
1047 myhash
.update(str(exec_time_start
));
1048 if(sys
.version
[0]>="3"):
1049 myhash
.update(httpurl
.encode('utf-8'));
1050 myhash
.update(str(buffersize
).encode('utf-8'));
1051 myhash
.update(str(exec_time_start
).encode('utf-8'));
1052 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1054 sleep
= geturls_download_sleep
;
1057 pretmpfilename
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1058 if(not pretmpfilename
):
1060 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1061 tmpfilename
= f
.name
;
1063 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1064 except AttributeError:
1066 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1071 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1072 f
.write(pretmpfilename
.get('Content'));
1074 exec_time_end
= time
.time();
1075 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1076 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1079 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1080 global geturls_download_sleep
, havezstd
, havebrotli
;
1082 sleep
= geturls_download_sleep
;
1085 if(not outfile
=="-"):
1086 outpath
= outpath
.rstrip(os
.path
.sep
);
1087 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1088 if(not os
.path
.exists(outpath
)):
1089 os
.makedirs(outpath
);
1090 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1092 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1094 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1095 if(not pretmpfilename
):
1097 tmpfilename
= pretmpfilename
.get('Filename');
1098 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1100 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1101 exec_time_start
= time
.time();
1102 shutil
.move(tmpfilename
, filepath
);
1104 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1105 except AttributeError:
1107 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1112 exec_time_end
= time
.time();
1113 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1114 if(os
.path
.exists(tmpfilename
)):
1115 os
.remove(tmpfilename
);
1116 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1118 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1119 tmpfilename
= pretmpfilename
.get('Filename');
1120 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1123 exec_time_start
= time
.time();
1124 with
open(tmpfilename
, 'rb') as ft
:
1127 databytes
= ft
.read(buffersize
[1]);
1128 if not databytes
: break;
1129 datasize
= len(databytes
);
1130 fulldatasize
= datasize
+ fulldatasize
;
1133 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1134 downloaddiff
= fulldatasize
- prevdownsize
;
1135 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1136 prevdownsize
= fulldatasize
;
1139 fdata
= f
.getvalue();
1142 os
.remove(tmpfilename
);
1143 exec_time_end
= time
.time();
1144 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1145 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1148 def download_from_url_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1149 global geturls_download_sleep
, havezstd
, havebrotli
, havezstd
, havebrotli
;
1151 sleep
= geturls_download_sleep
;
1154 urlparts
= urlparse
.urlparse(httpurl
);
1155 if(isinstance(httpheaders
, list)):
1156 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1157 httpheaders
= fix_header_names(httpheaders
);
1158 if(httpuseragent
is not None):
1159 if('User-Agent' in httpheaders
):
1160 httpheaders
['User-Agent'] = httpuseragent
;
1162 httpuseragent
.update({'User-Agent': httpuseragent
});
1163 if(httpreferer
is not None):
1164 if('Referer' in httpheaders
):
1165 httpheaders
['Referer'] = httpreferer
;
1167 httpuseragent
.update({'Referer': httpreferer
});
1168 if(urlparts
.username
is not None or urlparts
.password
is not None):
1169 if(sys
.version
[0]=="2"):
1170 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1171 if(sys
.version
[0]>="3"):
1172 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1173 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1174 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1175 geturls_opener
.addheaders
= httpheaders
;
1177 if(urlparts
[0]=="http"):
1178 httpconn
= HTTPConnection(urlparts
[1], timeout
=timeout
);
1179 elif(urlparts
[0]=="https"):
1180 httpconn
= HTTPSConnection(urlparts
[1], timeout
=timeout
);
1183 if(postdata
is not None and not isinstance(postdata
, dict)):
1184 postdata
= urlencode(postdata
);
1186 if(httpmethod
=="GET"):
1187 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1188 elif(httpmethod
=="POST"):
1189 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1191 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1192 except socket
.timeout
:
1193 log
.info("Error With URL "+httpurl
);
1195 except socket
.gaierror
:
1196 log
.info("Error With URL "+httpurl
);
1198 except BlockingIOError
:
1199 log
.info("Error With URL "+httpurl
);
1201 geturls_text
= httpconn
.getresponse();
1202 httpcodeout
= geturls_text
.status
;
1203 httpcodereason
= geturls_text
.reason
;
1204 if(geturls_text
.version
=="10"):
1205 httpversionout
= "1.0";
1207 httpversionout
= "1.1";
1208 httpmethodout
= geturls_text
._method
;
1209 httpurlout
= httpurl
;
1210 httpheaderout
= geturls_text
.getheaders();
1211 httpheadersentout
= httpheaders
;
1212 if(isinstance(httpheaderout
, list)):
1213 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1214 if(sys
.version
[0]=="2"):
1216 prehttpheaderout
= httpheaderout
;
1217 httpheaderkeys
= httpheaderout
.keys();
1218 imax
= len(httpheaderkeys
);
1222 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1224 except AttributeError:
1226 httpheaderout
= fix_header_names(httpheaderout
);
1227 if(isinstance(httpheadersentout
, list)):
1228 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1229 httpheadersentout
= fix_header_names(httpheadersentout
);
1230 downloadsize
= httpheaderout
.get('Content-Length');
1231 if(downloadsize
is not None):
1232 downloadsize
= int(downloadsize
);
1233 if downloadsize
is None: downloadsize
= 0;
1236 log
.info("Downloading URL "+httpurl
);
1237 with
BytesIO() as strbuf
:
1239 databytes
= geturls_text
.read(buffersize
);
1240 if not databytes
: break;
1241 datasize
= len(databytes
);
1242 fulldatasize
= datasize
+ fulldatasize
;
1245 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1246 downloaddiff
= fulldatasize
- prevdownsize
;
1247 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1248 prevdownsize
= fulldatasize
;
1249 strbuf
.write(databytes
);
1251 returnval_content
= strbuf
.read();
1252 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1254 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1257 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1259 returnval_content
= zlib
.decompress(returnval_content
);
1262 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1264 returnval_content
= brotli
.decompress(returnval_content
);
1265 except brotli
.error
:
1267 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1269 returnval_content
= zstandard
.decompress(returnval_content
);
1270 except zstandard
.error
:
1272 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1274 returnval_content
= lzma
.decompress(returnval_content
);
1275 except zstandard
.error
:
1277 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1279 returnval_content
= bz2
.decompress(returnval_content
);
1280 except zstandard
.error
:
1282 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httplib"};
1283 geturls_text
.close();
1286 def download_from_url_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1287 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1288 exec_time_start
= time
.time();
1289 myhash
= hashlib
.new("sha1");
1290 if(sys
.version
[0]=="2"):
1291 myhash
.update(httpurl
);
1292 myhash
.update(str(buffersize
));
1293 myhash
.update(str(exec_time_start
));
1294 if(sys
.version
[0]>="3"):
1295 myhash
.update(httpurl
.encode('utf-8'));
1296 myhash
.update(str(buffersize
).encode('utf-8'));
1297 myhash
.update(str(exec_time_start
).encode('utf-8'));
1298 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1300 sleep
= geturls_download_sleep
;
1303 pretmpfilename
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1304 if(not pretmpfilename
):
1306 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1307 tmpfilename
= f
.name
;
1309 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1310 except AttributeError:
1312 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1317 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1318 f
.write(pretmpfilename
.get('Content'));
1320 exec_time_end
= time
.time();
1321 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1322 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1325 def download_from_url_to_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1326 global geturls_download_sleep
, havezstd
, havebrotli
;
1328 sleep
= geturls_download_sleep
;
1331 if(not outfile
=="-"):
1332 outpath
= outpath
.rstrip(os
.path
.sep
);
1333 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1334 if(not os
.path
.exists(outpath
)):
1335 os
.makedirs(outpath
);
1336 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1338 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1340 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1341 if(not pretmpfilename
):
1343 tmpfilename
= pretmpfilename
.get('Filename');
1344 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1346 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1347 exec_time_start
= time
.time();
1348 shutil
.move(tmpfilename
, filepath
);
1350 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1351 except AttributeError:
1353 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1358 exec_time_end
= time
.time();
1359 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1360 if(os
.path
.exists(tmpfilename
)):
1361 os
.remove(tmpfilename
);
1362 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1364 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1365 tmpfilename
= pretmpfilename
.get('Filename');
1366 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1369 exec_time_start
= time
.time();
1370 with
open(tmpfilename
, 'rb') as ft
:
1373 databytes
= ft
.read(buffersize
[1]);
1374 if not databytes
: break;
1375 datasize
= len(databytes
);
1376 fulldatasize
= datasize
+ fulldatasize
;
1379 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1380 downloaddiff
= fulldatasize
- prevdownsize
;
1381 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1382 prevdownsize
= fulldatasize
;
1385 fdata
= f
.getvalue();
1388 os
.remove(tmpfilename
);
1389 exec_time_end
= time
.time();
1390 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1391 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1395 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1396 global geturls_download_sleep
, havezstd
, havebrotli
;
1398 sleep
= geturls_download_sleep
;
1401 urlparts
= urlparse
.urlparse(httpurl
);
1402 if(isinstance(httpheaders
, list)):
1403 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1404 httpheaders
= fix_header_names(httpheaders
);
1405 if(httpuseragent
is not None):
1406 if('User-Agent' in httpheaders
):
1407 httpheaders
['User-Agent'] = httpuseragent
;
1409 httpuseragent
.update({'User-Agent': httpuseragent
});
1410 if(httpreferer
is not None):
1411 if('Referer' in httpheaders
):
1412 httpheaders
['Referer'] = httpreferer
;
1414 httpuseragent
.update({'Referer': httpreferer
});
1415 if(urlparts
.username
is not None or urlparts
.password
is not None):
1416 if(sys
.version
[0]=="2"):
1417 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1418 if(sys
.version
[0]>="3"):
1419 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1420 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1421 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1422 geturls_opener
.addheaders
= httpheaders
;
1424 if(urlparts
[0]=="http"):
1425 httpconn
= HTTPConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
1426 elif(urlparts
[0]=="https"):
1427 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
1430 if(postdata
is not None and not isinstance(postdata
, dict)):
1431 postdata
= urlencode(postdata
);
1433 if(httpmethod
=="GET"):
1434 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1435 elif(httpmethod
=="POST"):
1436 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1438 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1439 except socket
.timeout
:
1440 log
.info("Error With URL "+httpurl
);
1442 except socket
.gaierror
:
1443 log
.info("Error With URL "+httpurl
);
1445 except BlockingIOError
:
1446 log
.info("Error With URL "+httpurl
);
1448 geturls_text
= httpconn
.getresponse();
1449 httpcodeout
= geturls_text
.status
;
1450 httpcodereason
= geturls_text
.reason
;
1451 if(geturls_text
.version
=="10"):
1452 httpversionout
= "1.0";
1454 httpversionout
= "1.1";
1455 httpmethodout
= httpmethod
;
1456 httpurlout
= httpurl
;
1457 httpheaderout
= geturls_text
.getheaders();
1458 httpheadersentout
= httpheaders
;
1459 if(isinstance(httpheaderout
, list)):
1460 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1461 if(sys
.version
[0]=="2"):
1463 prehttpheaderout
= httpheaderout
;
1464 httpheaderkeys
= httpheaderout
.keys();
1465 imax
= len(httpheaderkeys
);
1469 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1471 except AttributeError:
1473 httpheaderout
= fix_header_names(httpheaderout
);
1474 if(isinstance(httpheadersentout
, list)):
1475 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1476 httpheadersentout
= fix_header_names(httpheadersentout
);
1477 downloadsize
= httpheaderout
.get('Content-Length');
1478 if(downloadsize
is not None):
1479 downloadsize
= int(downloadsize
);
1480 if downloadsize
is None: downloadsize
= 0;
1483 log
.info("Downloading URL "+httpurl
);
1484 with
BytesIO() as strbuf
:
1486 databytes
= geturls_text
.read(buffersize
);
1487 if not databytes
: break;
1488 datasize
= len(databytes
);
1489 fulldatasize
= datasize
+ fulldatasize
;
1492 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1493 downloaddiff
= fulldatasize
- prevdownsize
;
1494 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1495 prevdownsize
= fulldatasize
;
1496 strbuf
.write(databytes
);
1498 returnval_content
= strbuf
.read();
1499 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1501 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1504 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1506 returnval_content
= zlib
.decompress(returnval_content
);
1509 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1511 returnval_content
= brotli
.decompress(returnval_content
);
1512 except brotli
.error
:
1514 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1516 returnval_content
= zstandard
.decompress(returnval_content
);
1517 except zstandard
.error
:
1519 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1521 returnval_content
= lzma
.decompress(returnval_content
);
1522 except zstandard
.error
:
1524 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1526 returnval_content
= bz2
.decompress(returnval_content
);
1527 except zstandard
.error
:
1529 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httplib2"};
1530 geturls_text
.close();
1533 if(not havehttplib2
):
1534 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1535 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1539 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1540 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1541 exec_time_start
= time
.time();
1542 myhash
= hashlib
.new("sha1");
1543 if(sys
.version
[0]=="2"):
1544 myhash
.update(httpurl
);
1545 myhash
.update(str(buffersize
));
1546 myhash
.update(str(exec_time_start
));
1547 if(sys
.version
[0]>="3"):
1548 myhash
.update(httpurl
.encode('utf-8'));
1549 myhash
.update(str(buffersize
).encode('utf-8'));
1550 myhash
.update(str(exec_time_start
).encode('utf-8'));
1551 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1553 sleep
= geturls_download_sleep
;
1556 pretmpfilename
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1557 if(not pretmpfilename
):
1559 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1560 tmpfilename
= f
.name
;
1562 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1563 except AttributeError:
1565 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1570 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1571 f
.write(pretmpfilename
.get('Content'));
1573 exec_time_end
= time
.time();
1574 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1575 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1578 if(not havehttplib2
):
1579 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1580 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1584 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1585 global geturls_download_sleep
, havezstd
, havebrotli
;
1587 sleep
= geturls_download_sleep
;
1590 if(not outfile
=="-"):
1591 outpath
= outpath
.rstrip(os
.path
.sep
);
1592 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1593 if(not os
.path
.exists(outpath
)):
1594 os
.makedirs(outpath
);
1595 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1597 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1599 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1600 if(not pretmpfilename
):
1602 tmpfilename
= pretmpfilename
.get('Filename');
1603 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1605 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1606 exec_time_start
= time
.time();
1607 shutil
.move(tmpfilename
, filepath
);
1609 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1610 except AttributeError:
1612 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1617 exec_time_end
= time
.time();
1618 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1619 if(os
.path
.exists(tmpfilename
)):
1620 os
.remove(tmpfilename
);
1621 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1623 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1624 tmpfilename
= pretmpfilename
.get('Filename');
1625 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1628 exec_time_start
= time
.time();
1629 with
open(tmpfilename
, 'rb') as ft
:
1632 databytes
= ft
.read(buffersize
[1]);
1633 if not databytes
: break;
1634 datasize
= len(databytes
);
1635 fulldatasize
= datasize
+ fulldatasize
;
1638 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1639 downloaddiff
= fulldatasize
- prevdownsize
;
1640 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1641 prevdownsize
= fulldatasize
;
1644 fdata
= f
.getvalue();
1647 os
.remove(tmpfilename
);
1648 exec_time_end
= time
.time();
1649 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1650 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1653 if(not havehttplib2
):
1654 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1655 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1658 def download_from_url_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1659 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1662 def download_from_url_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1663 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1666 def download_from_url_to_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1667 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1671 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1672 global geturls_download_sleep
, havezstd
, havebrotli
;
1674 sleep
= geturls_download_sleep
;
1677 urlparts
= urlparse
.urlparse(httpurl
);
1678 if(isinstance(httpheaders
, list)):
1679 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1680 httpheaders
= fix_header_names(httpheaders
);
1681 if(httpuseragent
is not None):
1682 if('User-Agent' in httpheaders
):
1683 httpheaders
['User-Agent'] = httpuseragent
;
1685 httpuseragent
.update({'User-Agent': httpuseragent
});
1686 if(httpreferer
is not None):
1687 if('Referer' in httpheaders
):
1688 httpheaders
['Referer'] = httpreferer
;
1690 httpuseragent
.update({'Referer': httpreferer
});
1691 if(urlparts
.username
is not None or urlparts
.password
is not None):
1692 if(sys
.version
[0]=="2"):
1693 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1694 if(sys
.version
[0]>="3"):
1695 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1696 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1698 if(postdata
is not None and not isinstance(postdata
, dict)):
1699 postdata
= urlencode(postdata
);
1701 reqsession
= requests
.Session();
1702 if(httpmethod
=="GET"):
1703 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1704 elif(httpmethod
=="POST"):
1705 geturls_text
= reqsession
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1707 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1708 except requests
.exceptions
.ConnectTimeout
:
1709 log
.info("Error With URL "+httpurl
);
1711 except requests
.exceptions
.ConnectError
:
1712 log
.info("Error With URL "+httpurl
);
1714 except socket
.timeout
:
1715 log
.info("Error With URL "+httpurl
);
1717 httpcodeout
= geturls_text
.status_code
;
1718 httpcodereason
= geturls_text
.reason
;
1719 if(geturls_text
.raw
.version
=="10"):
1720 httpversionout
= "1.0";
1722 httpversionout
= "1.1";
1723 httpmethodout
= httpmethod
;
1724 httpurlout
= geturls_text
.url
;
1725 httpheaderout
= geturls_text
.headers
;
1726 httpheadersentout
= geturls_text
.request
.headers
;
1727 if(isinstance(httpheaderout
, list)):
1728 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1729 if(sys
.version
[0]=="2"):
1731 prehttpheaderout
= httpheaderout
;
1732 httpheaderkeys
= httpheaderout
.keys();
1733 imax
= len(httpheaderkeys
);
1737 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1739 except AttributeError:
1741 httpheaderout
= fix_header_names(httpheaderout
);
1742 if(isinstance(httpheadersentout
, list)):
1743 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1744 httpheadersentout
= fix_header_names(httpheadersentout
);
1745 downloadsize
= httpheaderout
.get('Content-Length');
1746 if(downloadsize
is not None):
1747 downloadsize
= int(downloadsize
);
1748 if downloadsize
is None: downloadsize
= 0;
1751 log
.info("Downloading URL "+httpurl
);
1752 with
BytesIO() as strbuf
:
1754 databytes
= geturls_text
.raw
.read(buffersize
);
1755 if not databytes
: break;
1756 datasize
= len(databytes
);
1757 fulldatasize
= datasize
+ fulldatasize
;
1760 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1761 downloaddiff
= fulldatasize
- prevdownsize
;
1762 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1763 prevdownsize
= fulldatasize
;
1764 strbuf
.write(databytes
);
1766 returnval_content
= strbuf
.read();
1767 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1769 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1772 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1774 returnval_content
= zlib
.decompress(returnval_content
);
1777 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1779 returnval_content
= brotli
.decompress(returnval_content
);
1780 except brotli
.error
:
1782 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1784 returnval_content
= zstandard
.decompress(returnval_content
);
1785 except zstandard
.error
:
1787 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1789 returnval_content
= lzma
.decompress(returnval_content
);
1790 except zstandard
.error
:
1792 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1794 returnval_content
= bz2
.decompress(returnval_content
);
1795 except zstandard
.error
:
1797 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "requests"};
1798 geturls_text
.close();
1801 if(not haverequests
):
1802 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1803 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1807 def download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1808 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1809 exec_time_start
= time
.time();
1810 myhash
= hashlib
.new("sha1");
1811 if(sys
.version
[0]=="2"):
1812 myhash
.update(httpurl
);
1813 myhash
.update(str(buffersize
));
1814 myhash
.update(str(exec_time_start
));
1815 if(sys
.version
[0]>="3"):
1816 myhash
.update(httpurl
.encode('utf-8'));
1817 myhash
.update(str(buffersize
).encode('utf-8'));
1818 myhash
.update(str(exec_time_start
).encode('utf-8'));
1819 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1821 sleep
= geturls_download_sleep
;
1824 pretmpfilename
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1825 if(not pretmpfilename
):
1827 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1828 tmpfilename
= f
.name
;
1830 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1831 except AttributeError:
1833 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1838 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1839 f
.write(pretmpfilename
.get('Content'));
1841 exec_time_end
= time
.time();
1842 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1843 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1846 if(not haverequests
):
1847 def download_from_url_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1848 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1852 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1853 global geturls_download_sleep
, havezstd
, havebrotli
;
1855 sleep
= geturls_download_sleep
;
1858 if(not outfile
=="-"):
1859 outpath
= outpath
.rstrip(os
.path
.sep
);
1860 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1861 if(not os
.path
.exists(outpath
)):
1862 os
.makedirs(outpath
);
1863 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1865 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1867 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1868 if(not pretmpfilename
):
1870 tmpfilename
= pretmpfilename
.get('Filename');
1871 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1873 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1874 exec_time_start
= time
.time();
1875 shutil
.move(tmpfilename
, filepath
);
1877 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1878 except AttributeError:
1880 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1885 exec_time_end
= time
.time();
1886 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1887 if(os
.path
.exists(tmpfilename
)):
1888 os
.remove(tmpfilename
);
1889 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1891 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1892 tmpfilename
= pretmpfilename
.get('Filename');
1893 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1896 exec_time_start
= time
.time();
1897 with
open(tmpfilename
, 'rb') as ft
:
1900 databytes
= ft
.read(buffersize
[1]);
1901 if not databytes
: break;
1902 datasize
= len(databytes
);
1903 fulldatasize
= datasize
+ fulldatasize
;
1906 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1907 downloaddiff
= fulldatasize
- prevdownsize
;
1908 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1909 prevdownsize
= fulldatasize
;
1912 fdata
= f
.getvalue();
1915 os
.remove(tmpfilename
);
1916 exec_time_end
= time
.time();
1917 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1918 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1921 if(not haverequests
):
1922 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1923 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1927 def download_from_url_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1928 global geturls_download_sleep
, havezstd
, havebrotli
;
1930 sleep
= geturls_download_sleep
;
1933 urlparts
= urlparse
.urlparse(httpurl
);
1934 if(isinstance(httpheaders
, list)):
1935 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1936 httpheaders
= fix_header_names(httpheaders
);
1937 if(httpuseragent
is not None):
1938 if('User-Agent' in httpheaders
):
1939 httpheaders
['User-Agent'] = httpuseragent
;
1941 httpuseragent
.update({'User-Agent': httpuseragent
});
1942 if(httpreferer
is not None):
1943 if('Referer' in httpheaders
):
1944 httpheaders
['Referer'] = httpreferer
;
1946 httpuseragent
.update({'Referer': httpreferer
});
1947 if(urlparts
.username
is not None or urlparts
.password
is not None):
1948 if(sys
.version
[0]=="2"):
1949 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1950 if(sys
.version
[0]>="3"):
1951 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1952 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1954 if(postdata
is not None and not isinstance(postdata
, dict)):
1955 postdata
= urlencode(postdata
);
1957 reqsession
= aiohttp
.ClientSession(cookie_jar
=httpcookie
, headers
=httpheaders
, timeout
=timeout
, read_timeout
=timeout
, conn_timeout
=timeout
, read_bufsize
=buffersize
);
1958 if(httpmethod
=="GET"):
1959 geturls_text
= reqsession
.get(httpurl
);
1960 elif(httpmethod
=="POST"):
1961 geturls_text
= reqsession
.post(httpurl
, data
=postdata
);
1963 geturls_text
= reqsession
.get(httpurl
);
1964 except aiohttp
.exceptions
.ConnectTimeout
:
1965 log
.info("Error With URL "+httpurl
);
1967 except aiohttp
.exceptions
.ConnectError
:
1968 log
.info("Error With URL "+httpurl
);
1970 except socket
.timeout
:
1971 log
.info("Error With URL "+httpurl
);
1973 httpcodeout
= geturls_text
.status
;
1974 httpcodereason
= geturls_text
.reason
;
1975 httpversionout
= geturls_text
.version
;
1976 httpmethodout
= geturls_text
.method
;
1977 httpurlout
= geturls_text
.url
;
1978 httpheaderout
= geturls_text
.headers
;
1979 httpheadersentout
= geturls_text
.request_info
.headers
;
1980 if(isinstance(httpheaderout
, list)):
1981 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1982 if(sys
.version
[0]=="2"):
1984 prehttpheaderout
= httpheaderout
;
1985 httpheaderkeys
= httpheaderout
.keys();
1986 imax
= len(httpheaderkeys
);
1990 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1992 except AttributeError:
1994 httpheaderout
= fix_header_names(httpheaderout
);
1995 if(isinstance(httpheadersentout
, list)):
1996 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1997 httpheadersentout
= fix_header_names(httpheadersentout
);
1998 downloadsize
= httpheaderout
.get('Content-Length');
1999 if(downloadsize
is not None):
2000 downloadsize
= int(downloadsize
);
2001 if downloadsize
is None: downloadsize
= 0;
2004 log
.info("Downloading URL "+httpurl
);
2005 with
BytesIO() as strbuf
:
2007 databytes
= geturls_text
.read(buffersize
);
2008 if not databytes
: break;
2009 datasize
= len(databytes
);
2010 fulldatasize
= datasize
+ fulldatasize
;
2013 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2014 downloaddiff
= fulldatasize
- prevdownsize
;
2015 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2016 prevdownsize
= fulldatasize
;
2017 strbuf
.write(databytes
);
2019 returnval_content
= strbuf
.read();
2020 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2022 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2025 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2027 returnval_content
= zlib
.decompress(returnval_content
);
2030 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2032 returnval_content
= brotli
.decompress(returnval_content
);
2033 except brotli
.error
:
2035 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2037 returnval_content
= zstandard
.decompress(returnval_content
);
2038 except zstandard
.error
:
2040 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2042 returnval_content
= lzma
.decompress(returnval_content
);
2043 except zstandard
.error
:
2045 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2047 returnval_content
= bz2
.decompress(returnval_content
);
2048 except zstandard
.error
:
2050 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "aiohttp"};
2051 geturls_text
.close();
2054 if(not haveaiohttp
):
2055 def download_from_url_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2056 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2060 def download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2061 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2062 exec_time_start
= time
.time();
2063 myhash
= hashlib
.new("sha1");
2064 if(sys
.version
[0]=="2"):
2065 myhash
.update(httpurl
);
2066 myhash
.update(str(buffersize
));
2067 myhash
.update(str(exec_time_start
));
2068 if(sys
.version
[0]>="3"):
2069 myhash
.update(httpurl
.encode('utf-8'));
2070 myhash
.update(str(buffersize
).encode('utf-8'));
2071 myhash
.update(str(exec_time_start
).encode('utf-8'));
2072 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2074 sleep
= geturls_download_sleep
;
2077 pretmpfilename
= download_from_url_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2078 if(not pretmpfilename
):
2080 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2081 tmpfilename
= f
.name
;
2083 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2084 except AttributeError:
2086 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2091 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2092 f
.write(pretmpfilename
.get('Content'));
2094 exec_time_end
= time
.time();
2095 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2096 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2099 if(not haveaiohttp
):
2100 def download_from_url_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2101 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2105 def download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2106 global geturls_download_sleep
, havezstd
, havebrotli
;
2108 sleep
= geturls_download_sleep
;
2111 if(not outfile
=="-"):
2112 outpath
= outpath
.rstrip(os
.path
.sep
);
2113 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2114 if(not os
.path
.exists(outpath
)):
2115 os
.makedirs(outpath
);
2116 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2118 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2120 pretmpfilename
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2121 if(not pretmpfilename
):
2123 tmpfilename
= pretmpfilename
.get('Filename');
2124 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2126 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2127 exec_time_start
= time
.time();
2128 shutil
.move(tmpfilename
, filepath
);
2130 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2131 except AttributeError:
2133 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2138 exec_time_end
= time
.time();
2139 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2140 if(os
.path
.exists(tmpfilename
)):
2141 os
.remove(tmpfilename
);
2142 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2144 pretmpfilename
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2145 tmpfilename
= pretmpfilename
.get('Filename');
2146 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2149 exec_time_start
= time
.time();
2150 with
open(tmpfilename
, 'rb') as ft
:
2153 databytes
= ft
.read(buffersize
[1]);
2154 if not databytes
: break;
2155 datasize
= len(databytes
);
2156 fulldatasize
= datasize
+ fulldatasize
;
2159 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2160 downloaddiff
= fulldatasize
- prevdownsize
;
2161 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2162 prevdownsize
= fulldatasize
;
2165 fdata
= f
.getvalue();
2168 os
.remove(tmpfilename
);
2169 exec_time_end
= time
.time();
2170 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2171 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2174 if(not haveaiohttp
):
2175 def download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2176 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2180 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2181 global geturls_download_sleep
, havezstd
, havebrotli
;
2183 sleep
= geturls_download_sleep
;
2186 urlparts
= urlparse
.urlparse(httpurl
);
2187 if(isinstance(httpheaders
, list)):
2188 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2189 httpheaders
= fix_header_names(httpheaders
);
2190 if(httpuseragent
is not None):
2191 if('User-Agent' in httpheaders
):
2192 httpheaders
['User-Agent'] = httpuseragent
;
2194 httpuseragent
.update({'User-Agent': httpuseragent
});
2195 if(httpreferer
is not None):
2196 if('Referer' in httpheaders
):
2197 httpheaders
['Referer'] = httpreferer
;
2199 httpuseragent
.update({'Referer': httpreferer
});
2200 if(urlparts
.username
is not None or urlparts
.password
is not None):
2201 if(sys
.version
[0]=="2"):
2202 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2203 if(sys
.version
[0]>="3"):
2204 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2205 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2207 if(postdata
is not None and not isinstance(postdata
, dict)):
2208 postdata
= urlencode(postdata
);
2210 if(httpmethod
=="GET"):
2211 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2212 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2213 elif(httpmethod
=="POST"):
2214 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2215 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2217 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2218 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2219 except httpx
.ConnectTimeout
:
2220 log
.info("Error With URL "+httpurl
);
2222 except httpx
.ConnectError
:
2223 log
.info("Error With URL "+httpurl
);
2225 except socket
.timeout
:
2226 log
.info("Error With URL "+httpurl
);
2228 httpcodeout
= geturls_text
.status_code
;
2230 httpcodereason
= geturls_text
.reason_phrase
;
2232 httpcodereason
= http_status_to_reason(geturls_text
.status_code
);
2233 httpversionout
= geturls_text
.http_version
;
2234 httpmethodout
= httpmethod
;
2235 httpurlout
= str(geturls_text
.url
);
2236 httpheaderout
= geturls_text
.headers
;
2237 httpheadersentout
= geturls_text
.request
.headers
;
2238 if(isinstance(httpheaderout
, list)):
2239 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2240 if(sys
.version
[0]=="2"):
2242 prehttpheaderout
= httpheaderout
;
2243 httpheaderkeys
= httpheaderout
.keys();
2244 imax
= len(httpheaderkeys
);
2248 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2250 except AttributeError:
2252 httpheaderout
= fix_header_names(httpheaderout
);
2253 if(isinstance(httpheadersentout
, list)):
2254 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2255 httpheadersentout
= fix_header_names(httpheadersentout
);
2256 downloadsize
= httpheaderout
.get('Content-Length');
2257 if(downloadsize
is not None):
2258 downloadsize
= int(downloadsize
);
2259 if downloadsize
is None: downloadsize
= 0;
2262 log
.info("Downloading URL "+httpurl
);
2263 with
BytesIO() as strbuf
:
2264 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
2265 datasize
= len(databytes
);
2266 fulldatasize
= datasize
+ fulldatasize
;
2269 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2270 downloaddiff
= fulldatasize
- prevdownsize
;
2271 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2272 prevdownsize
= fulldatasize
;
2273 strbuf
.write(databytes
);
2275 returnval_content
= strbuf
.read();
2276 geturls_text
.close();
2277 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2279 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2282 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2284 returnval_content
= zlib
.decompress(returnval_content
);
2287 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2289 returnval_content
= brotli
.decompress(returnval_content
);
2290 except brotli
.error
:
2292 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2294 returnval_content
= zstandard
.decompress(returnval_content
);
2295 except zstandard
.error
:
2297 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2299 returnval_content
= lzma
.decompress(returnval_content
);
2300 except zstandard
.error
:
2302 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2304 returnval_content
= bz2
.decompress(returnval_content
);
2305 except zstandard
.error
:
2307 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpx"};
2308 geturls_text
.close();
2312 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2313 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2317 def download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2318 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2319 exec_time_start
= time
.time();
2320 myhash
= hashlib
.new("sha1");
2321 if(sys
.version
[0]=="2"):
2322 myhash
.update(httpurl
);
2323 myhash
.update(str(buffersize
));
2324 myhash
.update(str(exec_time_start
));
2325 if(sys
.version
[0]>="3"):
2326 myhash
.update(httpurl
.encode('utf-8'));
2327 myhash
.update(str(buffersize
).encode('utf-8'));
2328 myhash
.update(str(exec_time_start
).encode('utf-8'));
2329 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2331 sleep
= geturls_download_sleep
;
2334 pretmpfilename
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2335 if(not pretmpfilename
):
2337 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2338 tmpfilename
= f
.name
;
2340 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2341 except AttributeError:
2343 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2348 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2349 f
.write(pretmpfilename
.get('Content'));
2351 exec_time_end
= time
.time();
2352 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2353 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2357 def download_from_url_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2358 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2362 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2363 global geturls_download_sleep
, havezstd
, havebrotli
;
2365 sleep
= geturls_download_sleep
;
2368 if(not outfile
=="-"):
2369 outpath
= outpath
.rstrip(os
.path
.sep
);
2370 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2371 if(not os
.path
.exists(outpath
)):
2372 os
.makedirs(outpath
);
2373 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2375 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2377 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2378 if(not pretmpfilename
):
2380 tmpfilename
= pretmpfilename
.get('Filename');
2381 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2383 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2384 exec_time_start
= time
.time();
2385 shutil
.move(tmpfilename
, filepath
);
2387 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2388 except AttributeError:
2390 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2395 exec_time_end
= time
.time();
2396 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2397 if(os
.path
.exists(tmpfilename
)):
2398 os
.remove(tmpfilename
);
2399 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2401 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2402 tmpfilename
= pretmpfilename
.get('Filename');
2403 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2406 exec_time_start
= time
.time();
2407 with
open(tmpfilename
, 'rb') as ft
:
2410 databytes
= ft
.read(buffersize
[1]);
2411 if not databytes
: break;
2412 datasize
= len(databytes
);
2413 fulldatasize
= datasize
+ fulldatasize
;
2416 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2417 downloaddiff
= fulldatasize
- prevdownsize
;
2418 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2419 prevdownsize
= fulldatasize
;
2422 fdata
= f
.getvalue();
2425 os
.remove(tmpfilename
);
2426 exec_time_end
= time
.time();
2427 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2428 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2432 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2433 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2437 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2438 global geturls_download_sleep
, havezstd
, havebrotli
;
2440 sleep
= geturls_download_sleep
;
2443 urlparts
= urlparse
.urlparse(httpurl
);
2444 if(isinstance(httpheaders
, list)):
2445 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2446 httpheaders
= fix_header_names(httpheaders
);
2447 if(httpuseragent
is not None):
2448 if('User-Agent' in httpheaders
):
2449 httpheaders
['User-Agent'] = httpuseragent
;
2451 httpuseragent
.update({'User-Agent': httpuseragent
});
2452 if(httpreferer
is not None):
2453 if('Referer' in httpheaders
):
2454 httpheaders
['Referer'] = httpreferer
;
2456 httpuseragent
.update({'Referer': httpreferer
});
2457 if(urlparts
.username
is not None or urlparts
.password
is not None):
2458 if(sys
.version
[0]=="2"):
2459 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2460 if(sys
.version
[0]>="3"):
2461 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2462 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2464 if(postdata
is not None and not isinstance(postdata
, dict)):
2465 postdata
= urlencode(postdata
);
2467 if(httpmethod
=="GET"):
2468 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2469 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2470 elif(httpmethod
=="POST"):
2471 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2472 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2474 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2475 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2476 except httpx
.ConnectTimeout
:
2477 log
.info("Error With URL "+httpurl
);
2479 except httpx
.ConnectError
:
2480 log
.info("Error With URL "+httpurl
);
2482 except socket
.timeout
:
2483 log
.info("Error With URL "+httpurl
);
2485 httpcodeout
= geturls_text
.status_code
;
2487 httpcodereason
= geturls_text
.reason_phrase
;
2489 httpcodereason
= http_status_to_reason(geturls_text
.status_code
);
2490 httpversionout
= geturls_text
.http_version
;
2491 httpmethodout
= httpmethod
;
2492 httpurlout
= str(geturls_text
.url
);
2493 httpheaderout
= geturls_text
.headers
;
2494 httpheadersentout
= geturls_text
.request
.headers
;
2495 if(isinstance(httpheaderout
, list)):
2496 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2497 if(sys
.version
[0]=="2"):
2499 prehttpheaderout
= httpheaderout
;
2500 httpheaderkeys
= httpheaderout
.keys();
2501 imax
= len(httpheaderkeys
);
2505 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2507 except AttributeError:
2509 httpheaderout
= fix_header_names(httpheaderout
);
2510 if(isinstance(httpheadersentout
, list)):
2511 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2512 httpheadersentout
= fix_header_names(httpheadersentout
);
2513 downloadsize
= httpheaderout
.get('Content-Length');
2514 if(downloadsize
is not None):
2515 downloadsize
= int(downloadsize
);
2516 if downloadsize
is None: downloadsize
= 0;
2519 log
.info("Downloading URL "+httpurl
);
2520 with
BytesIO() as strbuf
:
2521 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
2522 datasize
= len(databytes
);
2523 fulldatasize
= datasize
+ fulldatasize
;
2526 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2527 downloaddiff
= fulldatasize
- prevdownsize
;
2528 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2529 prevdownsize
= fulldatasize
;
2530 strbuf
.write(databytes
);
2532 returnval_content
= strbuf
.read();
2533 geturls_text
.close();
2534 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2536 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2539 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2541 returnval_content
= zlib
.decompress(returnval_content
);
2544 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2546 returnval_content
= brotli
.decompress(returnval_content
);
2547 except brotli
.error
:
2549 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2551 returnval_content
= zstandard
.decompress(returnval_content
);
2552 except zstandard
.error
:
2554 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2556 returnval_content
= lzma
.decompress(returnval_content
);
2557 except zstandard
.error
:
2559 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2561 returnval_content
= bz2
.decompress(returnval_content
);
2562 except zstandard
.error
:
2564 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpx2"};
2565 geturls_text
.close();
2569 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2570 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2574 def download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2575 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2576 exec_time_start
= time
.time();
2577 myhash
= hashlib
.new("sha1");
2578 if(sys
.version
[0]=="2"):
2579 myhash
.update(httpurl
);
2580 myhash
.update(str(buffersize
));
2581 myhash
.update(str(exec_time_start
));
2582 if(sys
.version
[0]>="3"):
2583 myhash
.update(httpurl
.encode('utf-8'));
2584 myhash
.update(str(buffersize
).encode('utf-8'));
2585 myhash
.update(str(exec_time_start
).encode('utf-8'));
2586 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2588 sleep
= geturls_download_sleep
;
2591 pretmpfilename
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2592 if(not pretmpfilename
):
2594 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2595 tmpfilename
= f
.name
;
2597 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2598 except AttributeError:
2600 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2605 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2606 f
.write(pretmpfilename
.get('Content'));
2608 exec_time_end
= time
.time();
2609 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2610 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2614 def download_from_url_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2615 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2619 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2620 global geturls_download_sleep
, havezstd
, havebrotli
;
2622 sleep
= geturls_download_sleep
;
2625 if(not outfile
=="-"):
2626 outpath
= outpath
.rstrip(os
.path
.sep
);
2627 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2628 if(not os
.path
.exists(outpath
)):
2629 os
.makedirs(outpath
);
2630 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2632 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2634 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2635 if(not pretmpfilename
):
2637 tmpfilename
= pretmpfilename
.get('Filename');
2638 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2640 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2641 exec_time_start
= time
.time();
2642 shutil
.move(tmpfilename
, filepath
);
2644 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2645 except AttributeError:
2647 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2652 exec_time_end
= time
.time();
2653 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2654 if(os
.path
.exists(tmpfilename
)):
2655 os
.remove(tmpfilename
);
2656 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2658 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2659 tmpfilename
= pretmpfilename
.get('Filename');
2660 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2663 exec_time_start
= time
.time();
2664 with
open(tmpfilename
, 'rb') as ft
:
2667 databytes
= ft
.read(buffersize
[1]);
2668 if not databytes
: break;
2669 datasize
= len(databytes
);
2670 fulldatasize
= datasize
+ fulldatasize
;
2673 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2674 downloaddiff
= fulldatasize
- prevdownsize
;
2675 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2676 prevdownsize
= fulldatasize
;
2679 fdata
= f
.getvalue();
2682 os
.remove(tmpfilename
);
2683 exec_time_end
= time
.time();
2684 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2685 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2689 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2690 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2694 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2695 global geturls_download_sleep
, havezstd
, havebrotli
;
2697 sleep
= geturls_download_sleep
;
2700 urlparts
= urlparse
.urlparse(httpurl
);
2701 if(isinstance(httpheaders
, list)):
2702 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2703 httpheaders
= fix_header_names(httpheaders
);
2704 if(httpuseragent
is not None):
2705 if('User-Agent' in httpheaders
):
2706 httpheaders
['User-Agent'] = httpuseragent
;
2708 httpuseragent
.update({'User-Agent': httpuseragent
});
2709 if(httpreferer
is not None):
2710 if('Referer' in httpheaders
):
2711 httpheaders
['Referer'] = httpreferer
;
2713 httpuseragent
.update({'Referer': httpreferer
});
2714 if(urlparts
.username
is not None or urlparts
.password
is not None):
2715 if(sys
.version
[0]=="2"):
2716 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2717 if(sys
.version
[0]>="3"):
2718 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2719 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2721 if(postdata
is not None and not isinstance(postdata
, dict)):
2722 postdata
= urlencode(postdata
);
2724 if(httpmethod
=="GET"):
2725 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2726 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2727 elif(httpmethod
=="POST"):
2728 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2729 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2731 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2732 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2733 except httpcore
.ConnectTimeout
:
2734 log
.info("Error With URL "+httpurl
);
2736 except httpcore
.ConnectError
:
2737 log
.info("Error With URL "+httpurl
);
2739 except socket
.timeout
:
2740 log
.info("Error With URL "+httpurl
);
2742 httpcodeout
= geturls_text
.status
;
2743 httpcodereason
= http_status_to_reason(geturls_text
.status
);
2744 httpversionout
= "1.1";
2745 httpmethodout
= httpmethod
;
2746 httpurlout
= str(httpurl
);
2747 httpheaderout
= geturls_text
.headers
;
2748 httpheadersentout
= httpheaders
;
2749 if(isinstance(httpheaderout
, list)):
2750 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2751 if(sys
.version
[0]=="2"):
2753 prehttpheaderout
= httpheaderout
;
2754 httpheaderkeys
= httpheaderout
.keys();
2755 imax
= len(httpheaderkeys
);
2759 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2761 except AttributeError:
2763 httpheaderout
= fix_header_names(httpheaderout
);
2764 if(isinstance(httpheadersentout
, list)):
2765 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2766 httpheadersentout
= fix_header_names(httpheadersentout
);
2767 downloadsize
= httpheaderout
.get('Content-Length');
2768 if(downloadsize
is not None):
2769 downloadsize
= int(downloadsize
);
2770 if downloadsize
is None: downloadsize
= 0;
2773 log
.info("Downloading URL "+httpurl
);
2774 with
BytesIO() as strbuf
:
2775 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
2776 datasize
= len(databytes
);
2777 fulldatasize
= datasize
+ fulldatasize
;
2780 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2781 downloaddiff
= fulldatasize
- prevdownsize
;
2782 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2783 prevdownsize
= fulldatasize
;
2784 strbuf
.write(databytes
);
2786 returnval_content
= strbuf
.read();
2787 geturls_text
.close();
2788 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2790 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2793 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2795 returnval_content
= zlib
.decompress(returnval_content
);
2798 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2800 returnval_content
= brotli
.decompress(returnval_content
);
2801 except brotli
.error
:
2803 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2805 returnval_content
= zstandard
.decompress(returnval_content
);
2806 except zstandard
.error
:
2808 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2810 returnval_content
= lzma
.decompress(returnval_content
);
2811 except zstandard
.error
:
2813 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2815 returnval_content
= bz2
.decompress(returnval_content
);
2816 except zstandard
.error
:
2818 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpcore"};
2819 geturls_text
.close();
2822 if(not havehttpcore
):
2823 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2824 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2828 def download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2829 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2830 exec_time_start
= time
.time();
2831 myhash
= hashlib
.new("sha1");
2832 if(sys
.version
[0]=="2"):
2833 myhash
.update(httpurl
);
2834 myhash
.update(str(buffersize
));
2835 myhash
.update(str(exec_time_start
));
2836 if(sys
.version
[0]>="3"):
2837 myhash
.update(httpurl
.encode('utf-8'));
2838 myhash
.update(str(buffersize
).encode('utf-8'));
2839 myhash
.update(str(exec_time_start
).encode('utf-8'));
2840 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2842 sleep
= geturls_download_sleep
;
2845 pretmpfilename
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2846 if(not pretmpfilename
):
2848 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2849 tmpfilename
= f
.name
;
2851 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2852 except AttributeError:
2854 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2859 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2860 f
.write(pretmpfilename
.get('Content'));
2862 exec_time_end
= time
.time();
2863 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2864 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2867 if(not havehttpcore
):
2868 def download_from_url_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2869 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2873 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2874 global geturls_download_sleep
, havezstd
, havebrotli
;
2876 sleep
= geturls_download_sleep
;
2879 if(not outfile
=="-"):
2880 outpath
= outpath
.rstrip(os
.path
.sep
);
2881 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2882 if(not os
.path
.exists(outpath
)):
2883 os
.makedirs(outpath
);
2884 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2886 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2888 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2889 if(not pretmpfilename
):
2891 tmpfilename
= pretmpfilename
.get('Filename');
2892 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2894 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2895 exec_time_start
= time
.time();
2896 shutil
.move(tmpfilename
, filepath
);
2898 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2899 except AttributeError:
2901 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2906 exec_time_end
= time
.time();
2907 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2908 if(os
.path
.exists(tmpfilename
)):
2909 os
.remove(tmpfilename
);
2910 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2912 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2913 tmpfilename
= pretmpfilename
.get('Filename');
2914 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2917 exec_time_start
= time
.time();
2918 with
open(tmpfilename
, 'rb') as ft
:
2921 databytes
= ft
.read(buffersize
[1]);
2922 if not databytes
: break;
2923 datasize
= len(databytes
);
2924 fulldatasize
= datasize
+ fulldatasize
;
2927 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2928 downloaddiff
= fulldatasize
- prevdownsize
;
2929 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2930 prevdownsize
= fulldatasize
;
2933 fdata
= f
.getvalue();
2936 os
.remove(tmpfilename
);
2937 exec_time_end
= time
.time();
2938 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2939 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2942 if(not havehttpcore
):
2943 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2944 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2948 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2949 global geturls_download_sleep
, havezstd
, havebrotli
;
2951 sleep
= geturls_download_sleep
;
2954 urlparts
= urlparse
.urlparse(httpurl
);
2955 if(isinstance(httpheaders
, list)):
2956 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2957 httpheaders
= fix_header_names(httpheaders
);
2958 if(httpuseragent
is not None):
2959 if('User-Agent' in httpheaders
):
2960 httpheaders
['User-Agent'] = httpuseragent
;
2962 httpuseragent
.update({'User-Agent': httpuseragent
});
2963 if(httpreferer
is not None):
2964 if('Referer' in httpheaders
):
2965 httpheaders
['Referer'] = httpreferer
;
2967 httpuseragent
.update({'Referer': httpreferer
});
2968 if(urlparts
.username
is not None or urlparts
.password
is not None):
2969 if(sys
.version
[0]=="2"):
2970 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2971 if(sys
.version
[0]>="3"):
2972 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2973 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2975 if(postdata
is not None and not isinstance(postdata
, dict)):
2976 postdata
= urlencode(postdata
);
2978 if(httpmethod
=="GET"):
2979 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2980 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2981 elif(httpmethod
=="POST"):
2982 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2983 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2985 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2986 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2987 except httpcore
.ConnectTimeout
:
2988 log
.info("Error With URL "+httpurl
);
2990 except httpcore
.ConnectError
:
2991 log
.info("Error With URL "+httpurl
);
2993 except socket
.timeout
:
2994 log
.info("Error With URL "+httpurl
);
2996 httpcodeout
= geturls_text
.status
;
2997 httpcodereason
= http_status_to_reason(geturls_text
.status
);
2998 httpversionout
= "1.1";
2999 httpmethodout
= httpmethod
;
3000 httpurlout
= str(httpurl
);
3001 httpheaderout
= geturls_text
.headers
;
3002 httpheadersentout
= httpheaders
;
3003 if(isinstance(httpheaderout
, list)):
3004 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3005 if(sys
.version
[0]=="2"):
3007 prehttpheaderout
= httpheaderout
;
3008 httpheaderkeys
= httpheaderout
.keys();
3009 imax
= len(httpheaderkeys
);
3013 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3015 except AttributeError:
3017 httpheaderout
= fix_header_names(httpheaderout
);
3018 if(isinstance(httpheadersentout
, list)):
3019 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3020 httpheadersentout
= fix_header_names(httpheadersentout
);
3021 downloadsize
= httpheaderout
.get('Content-Length');
3022 if(downloadsize
is not None):
3023 downloadsize
= int(downloadsize
);
3024 if downloadsize
is None: downloadsize
= 0;
3027 log
.info("Downloading URL "+httpurl
);
3028 with
BytesIO() as strbuf
:
3029 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
3030 datasize
= len(databytes
);
3031 fulldatasize
= datasize
+ fulldatasize
;
3034 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3035 downloaddiff
= fulldatasize
- prevdownsize
;
3036 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3037 prevdownsize
= fulldatasize
;
3038 strbuf
.write(databytes
);
3040 returnval_content
= strbuf
.read();
3041 geturls_text
.close();
3042 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3044 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3047 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3049 returnval_content
= zlib
.decompress(returnval_content
);
3052 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3054 returnval_content
= brotli
.decompress(returnval_content
);
3055 except brotli
.error
:
3057 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3059 returnval_content
= zstandard
.decompress(returnval_content
);
3060 except zstandard
.error
:
3062 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3064 returnval_content
= lzma
.decompress(returnval_content
);
3065 except zstandard
.error
:
3067 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3069 returnval_content
= bz2
.decompress(returnval_content
);
3070 except zstandard
.error
:
3072 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpcore2"};
3073 geturls_text
.close();
3076 if(not havehttpcore
):
3077 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3078 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3082 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3083 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3084 exec_time_start
= time
.time();
3085 myhash
= hashlib
.new("sha1");
3086 if(sys
.version
[0]=="2"):
3087 myhash
.update(httpurl
);
3088 myhash
.update(str(buffersize
));
3089 myhash
.update(str(exec_time_start
));
3090 if(sys
.version
[0]>="3"):
3091 myhash
.update(httpurl
.encode('utf-8'));
3092 myhash
.update(str(buffersize
).encode('utf-8'));
3093 myhash
.update(str(exec_time_start
).encode('utf-8'));
3094 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3096 sleep
= geturls_download_sleep
;
3099 pretmpfilename
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3100 if(not pretmpfilename
):
3102 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3103 tmpfilename
= f
.name
;
3105 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3106 except AttributeError:
3108 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3113 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3114 f
.write(pretmpfilename
.get('Content'));
3116 exec_time_end
= time
.time();
3117 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3118 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3121 if(not havehttpcore
):
3122 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3123 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3127 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3128 global geturls_download_sleep
, havezstd
, havebrotli
;
3130 sleep
= geturls_download_sleep
;
3133 if(not outfile
=="-"):
3134 outpath
= outpath
.rstrip(os
.path
.sep
);
3135 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3136 if(not os
.path
.exists(outpath
)):
3137 os
.makedirs(outpath
);
3138 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3140 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3142 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3143 if(not pretmpfilename
):
3145 tmpfilename
= pretmpfilename
.get('Filename');
3146 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3148 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3149 exec_time_start
= time
.time();
3150 shutil
.move(tmpfilename
, filepath
);
3152 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3153 except AttributeError:
3155 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3160 exec_time_end
= time
.time();
3161 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3162 if(os
.path
.exists(tmpfilename
)):
3163 os
.remove(tmpfilename
);
3164 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3166 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3167 tmpfilename
= pretmpfilename
.get('Filename');
3168 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3171 exec_time_start
= time
.time();
3172 with
open(tmpfilename
, 'rb') as ft
:
3175 databytes
= ft
.read(buffersize
[1]);
3176 if not databytes
: break;
3177 datasize
= len(databytes
);
3178 fulldatasize
= datasize
+ fulldatasize
;
3181 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3182 downloaddiff
= fulldatasize
- prevdownsize
;
3183 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3184 prevdownsize
= fulldatasize
;
3187 fdata
= f
.getvalue();
3190 os
.remove(tmpfilename
);
3191 exec_time_end
= time
.time();
3192 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3193 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3197 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3198 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3202 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3203 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3206 if(not haveurllib3
):
3207 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3208 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3212 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3213 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3216 if(not haveurllib3
):
3217 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3218 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3222 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3223 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3226 if(not haveurllib3
):
3227 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3228 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3232 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3233 global geturls_download_sleep
, havezstd
, havebrotli
;
3235 sleep
= geturls_download_sleep
;
3238 urlparts
= urlparse
.urlparse(httpurl
);
3239 if(isinstance(httpheaders
, list)):
3240 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3241 httpheaders
= fix_header_names(httpheaders
);
3242 if(httpuseragent
is not None):
3243 if('User-Agent' in httpheaders
):
3244 httpheaders
['User-Agent'] = httpuseragent
;
3246 httpuseragent
.update({'User-Agent': httpuseragent
});
3247 if(httpreferer
is not None):
3248 if('Referer' in httpheaders
):
3249 httpheaders
['Referer'] = httpreferer
;
3251 httpuseragent
.update({'Referer': httpreferer
});
3252 if(urlparts
.username
is not None or urlparts
.password
is not None):
3253 if(sys
.version
[0]=="2"):
3254 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3255 if(sys
.version
[0]>="3"):
3256 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3257 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3259 timeout
= urllib3
.util
.Timeout(connect
=timeout
, read
=timeout
);
3260 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
, timeout
=timeout
);
3261 if(postdata
is not None and not isinstance(postdata
, dict)):
3262 postdata
= urlencode(postdata
);
3264 if(httpmethod
=="GET"):
3265 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3266 elif(httpmethod
=="POST"):
3267 geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
3269 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3270 except urllib3
.exceptions
.ConnectTimeoutError
:
3271 log
.info("Error With URL "+httpurl
);
3273 except urllib3
.exceptions
.ConnectError
:
3274 log
.info("Error With URL "+httpurl
);
3276 except urllib3
.exceptions
.MaxRetryError
:
3277 log
.info("Error With URL "+httpurl
);
3279 except socket
.timeout
:
3280 log
.info("Error With URL "+httpurl
);
3283 log
.info("Error With URL "+httpurl
);
3285 httpcodeout
= geturls_text
.status
;
3286 httpcodereason
= geturls_text
.reason
;
3287 if(geturls_text
.version
=="10"):
3288 httpversionout
= "1.0";
3290 httpversionout
= "1.1";
3291 httpmethodout
= httpmethod
;
3292 httpurlout
= geturls_text
.geturl();
3293 httpheaderout
= geturls_text
.info();
3294 httpheadersentout
= httpheaders
;
3295 if(isinstance(httpheaderout
, list)):
3296 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3297 if(sys
.version
[0]=="2"):
3299 prehttpheaderout
= httpheaderout
;
3300 httpheaderkeys
= httpheaderout
.keys();
3301 imax
= len(httpheaderkeys
);
3305 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3307 except AttributeError:
3309 httpheaderout
= fix_header_names(httpheaderout
);
3310 if(isinstance(httpheadersentout
, list)):
3311 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3312 httpheadersentout
= fix_header_names(httpheadersentout
);
3313 downloadsize
= httpheaderout
.get('Content-Length');
3314 if(downloadsize
is not None):
3315 downloadsize
= int(downloadsize
);
3316 if downloadsize
is None: downloadsize
= 0;
3319 log
.info("Downloading URL "+httpurl
);
3320 with
BytesIO() as strbuf
:
3322 databytes
= geturls_text
.read(buffersize
);
3323 if not databytes
: break;
3324 datasize
= len(databytes
);
3325 fulldatasize
= datasize
+ fulldatasize
;
3328 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3329 downloaddiff
= fulldatasize
- prevdownsize
;
3330 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3331 prevdownsize
= fulldatasize
;
3332 strbuf
.write(databytes
);
3334 returnval_content
= strbuf
.read();
3335 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3337 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3340 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3342 returnval_content
= zlib
.decompress(returnval_content
);
3345 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3347 returnval_content
= brotli
.decompress(returnval_content
);
3348 except brotli
.error
:
3350 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3352 returnval_content
= zstandard
.decompress(returnval_content
);
3353 except zstandard
.error
:
3355 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3357 returnval_content
= lzma
.decompress(returnval_content
);
3358 except zstandard
.error
:
3360 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3362 returnval_content
= bz2
.decompress(returnval_content
);
3363 except zstandard
.error
:
3365 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "urllib3"};
3366 geturls_text
.close();
3369 if(not haveurllib3
):
3370 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3371 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3375 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3376 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3377 exec_time_start
= time
.time();
3378 myhash
= hashlib
.new("sha1");
3379 if(sys
.version
[0]=="2"):
3380 myhash
.update(httpurl
);
3381 myhash
.update(str(buffersize
));
3382 myhash
.update(str(exec_time_start
));
3383 if(sys
.version
[0]>="3"):
3384 myhash
.update(httpurl
.encode('utf-8'));
3385 myhash
.update(str(buffersize
).encode('utf-8'));
3386 myhash
.update(str(exec_time_start
).encode('utf-8'));
3387 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3389 sleep
= geturls_download_sleep
;
3392 pretmpfilename
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3393 if(not pretmpfilename
):
3395 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3396 tmpfilename
= f
.name
;
3398 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3399 except AttributeError:
3401 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3406 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3407 f
.write(pretmpfilename
.get('Content'));
3409 exec_time_end
= time
.time();
3410 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3411 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3414 if(not haveurllib3
):
3415 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3416 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3420 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3421 global geturls_download_sleep
, havezstd
, havebrotli
;
3423 sleep
= geturls_download_sleep
;
3426 if(not outfile
=="-"):
3427 outpath
= outpath
.rstrip(os
.path
.sep
);
3428 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3429 if(not os
.path
.exists(outpath
)):
3430 os
.makedirs(outpath
);
3431 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3433 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3435 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3436 if(not pretmpfilename
):
3438 tmpfilename
= pretmpfilename
.get('Filename');
3439 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3441 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3442 exec_time_start
= time
.time();
3443 shutil
.move(tmpfilename
, filepath
);
3445 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3446 except AttributeError:
3448 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3453 exec_time_end
= time
.time();
3454 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3455 if(os
.path
.exists(tmpfilename
)):
3456 os
.remove(tmpfilename
);
3457 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3459 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3460 tmpfilename
= pretmpfilename
.get('Filename');
3461 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3464 exec_time_start
= time
.time();
3465 with
open(tmpfilename
, 'rb') as ft
:
3468 databytes
= ft
.read(buffersize
[1]);
3469 if not databytes
: break;
3470 datasize
= len(databytes
);
3471 fulldatasize
= datasize
+ fulldatasize
;
3474 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3475 downloaddiff
= fulldatasize
- prevdownsize
;
3476 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3477 prevdownsize
= fulldatasize
;
3480 fdata
= f
.getvalue();
3483 os
.remove(tmpfilename
);
3484 exec_time_end
= time
.time();
3485 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3486 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3489 if(not haveurllib3
):
3490 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3491 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3495 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3496 global geturls_download_sleep
, havezstd
, havebrotli
;
3498 sleep
= geturls_download_sleep
;
3501 urlparts
= urlparse
.urlparse(httpurl
);
3502 if(isinstance(httpheaders
, list)):
3503 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3504 httpheaders
= fix_header_names(httpheaders
);
3505 if(httpuseragent
is not None):
3506 if('User-Agent' in httpheaders
):
3507 httpheaders
['User-Agent'] = httpuseragent
;
3509 httpuseragent
.update({'User-Agent': httpuseragent
});
3510 if(httpreferer
is not None):
3511 if('Referer' in httpheaders
):
3512 httpheaders
['Referer'] = httpreferer
;
3514 httpuseragent
.update({'Referer': httpreferer
});
3515 if(urlparts
.username
is not None or urlparts
.password
is not None):
3516 if(sys
.version
[0]=="2"):
3517 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3518 if(sys
.version
[0]>="3"):
3519 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3520 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3521 geturls_opener
= mechanize
.Browser();
3522 if(isinstance(httpheaders
, dict)):
3523 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
3525 geturls_opener
.addheaders
= httpheaders
;
3526 geturls_opener
.set_cookiejar(httpcookie
);
3527 geturls_opener
.set_handle_robots(False);
3528 if(postdata
is not None and not isinstance(postdata
, dict)):
3529 postdata
= urlencode(postdata
);
3531 if(httpmethod
=="GET"):
3532 geturls_text
= geturls_opener
.open(httpurl
);
3533 elif(httpmethod
=="POST"):
3534 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
3536 geturls_text
= geturls_opener
.open(httpurl
);
3537 except mechanize
.HTTPError
as geturls_text_error
:
3538 geturls_text
= geturls_text_error
;
3539 log
.info("Error With URL "+httpurl
);
3541 log
.info("Error With URL "+httpurl
);
3543 except socket
.timeout
:
3544 log
.info("Error With URL "+httpurl
);
3546 httpcodeout
= geturls_text
.code
;
3547 httpcodereason
= geturls_text
.msg
;
3548 httpversionout
= "1.1";
3549 httpmethodout
= httpmethod
;
3550 httpurlout
= geturls_text
.geturl();
3551 httpheaderout
= geturls_text
.info();
3552 reqhead
= geturls_opener
.request
;
3553 httpheadersentout
= reqhead
.header_items();
3554 if(isinstance(httpheaderout
, list)):
3555 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3556 if(sys
.version
[0]=="2"):
3558 prehttpheaderout
= httpheaderout
;
3559 httpheaderkeys
= httpheaderout
.keys();
3560 imax
= len(httpheaderkeys
);
3564 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3566 except AttributeError:
3568 httpheaderout
= fix_header_names(httpheaderout
);
3569 if(isinstance(httpheadersentout
, list)):
3570 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3571 httpheadersentout
= fix_header_names(httpheadersentout
);
3572 downloadsize
= httpheaderout
.get('Content-Length');
3573 if(downloadsize
is not None):
3574 downloadsize
= int(downloadsize
);
3575 if downloadsize
is None: downloadsize
= 0;
3578 log
.info("Downloading URL "+httpurl
);
3579 with
BytesIO() as strbuf
:
3581 databytes
= geturls_text
.read(buffersize
);
3582 if not databytes
: break;
3583 datasize
= len(databytes
);
3584 fulldatasize
= datasize
+ fulldatasize
;
3587 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3588 downloaddiff
= fulldatasize
- prevdownsize
;
3589 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3590 prevdownsize
= fulldatasize
;
3591 strbuf
.write(databytes
);
3593 returnval_content
= strbuf
.read();
3594 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3596 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3599 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3601 returnval_content
= zlib
.decompress(returnval_content
);
3604 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3606 returnval_content
= brotli
.decompress(returnval_content
);
3607 except brotli
.error
:
3609 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3611 returnval_content
= zstandard
.decompress(returnval_content
);
3612 except zstandard
.error
:
3614 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3616 returnval_content
= lzma
.decompress(returnval_content
);
3617 except zstandard
.error
:
3619 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3621 returnval_content
= bz2
.decompress(returnval_content
);
3622 except zstandard
.error
:
3624 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "mechanize"};
3625 geturls_text
.close();
3628 if(not havemechanize
):
3629 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3630 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3634 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3635 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3636 exec_time_start
= time
.time();
3637 myhash
= hashlib
.new("sha1");
3638 if(sys
.version
[0]=="2"):
3639 myhash
.update(httpurl
);
3640 myhash
.update(str(buffersize
));
3641 myhash
.update(str(exec_time_start
));
3642 if(sys
.version
[0]>="3"):
3643 myhash
.update(httpurl
.encode('utf-8'));
3644 myhash
.update(str(buffersize
).encode('utf-8'));
3645 myhash
.update(str(exec_time_start
).encode('utf-8'));
3646 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3648 sleep
= geturls_download_sleep
;
3651 pretmpfilename
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3652 if(not pretmpfilename
):
3654 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3655 tmpfilename
= f
.name
;
3657 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3658 except AttributeError:
3660 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3665 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3666 f
.write(pretmpfilename
.get('Content'));
3668 exec_time_end
= time
.time();
3669 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3670 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3673 if(not havemechanize
):
3674 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3675 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3679 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3680 global geturls_download_sleep
, havezstd
, havebrotli
;
3682 sleep
= geturls_download_sleep
;
3685 if(not outfile
=="-"):
3686 outpath
= outpath
.rstrip(os
.path
.sep
);
3687 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3688 if(not os
.path
.exists(outpath
)):
3689 os
.makedirs(outpath
);
3690 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3692 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3694 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3695 if(not pretmpfilename
):
3697 tmpfilename
= pretmpfilename
.get('Filename');
3698 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3700 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3701 exec_time_start
= time
.time();
3702 shutil
.move(tmpfilename
, filepath
);
3704 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3705 except AttributeError:
3707 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3712 exec_time_end
= time
.time();
3713 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3714 if(os
.path
.exists(tmpfilename
)):
3715 os
.remove(tmpfilename
);
3716 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3718 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3719 tmpfilename
= pretmpfilename
.get('Filename');
3720 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3723 exec_time_start
= time
.time();
3724 with
open(tmpfilename
, 'rb') as ft
:
3727 databytes
= ft
.read(buffersize
[1]);
3728 if not databytes
: break;
3729 datasize
= len(databytes
);
3730 fulldatasize
= datasize
+ fulldatasize
;
3733 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3734 downloaddiff
= fulldatasize
- prevdownsize
;
3735 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3736 prevdownsize
= fulldatasize
;
3739 fdata
= f
.getvalue();
3742 os
.remove(tmpfilename
);
3743 exec_time_end
= time
.time();
3744 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3745 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3748 if(not havemechanize
):
3749 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3750 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3754 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3755 global geturls_download_sleep
, havezstd
, havebrotli
;
3757 sleep
= geturls_download_sleep
;
3760 urlparts
= urlparse
.urlparse(httpurl
);
3761 if(isinstance(httpheaders
, list)):
3762 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3763 httpheaders
= fix_header_names(httpheaders
);
3764 if(httpuseragent
is not None):
3765 if('User-Agent' in httpheaders
):
3766 httpheaders
['User-Agent'] = httpuseragent
;
3768 httpuseragent
.update({'User-Agent': httpuseragent
});
3769 if(httpreferer
is not None):
3770 if('Referer' in httpheaders
):
3771 httpheaders
['Referer'] = httpreferer
;
3773 httpuseragent
.update({'Referer': httpreferer
});
3774 if(urlparts
.username
is not None or urlparts
.password
is not None):
3775 if(sys
.version
[0]=="2"):
3776 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3777 if(sys
.version
[0]>="3"):
3778 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3779 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3780 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
3781 if(isinstance(httpheaders
, dict)):
3782 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
3783 geturls_opener
.addheaders
= httpheaders
;
3785 if(postdata
is not None and not isinstance(postdata
, dict)):
3786 postdata
= urlencode(postdata
);
3787 retrieved_body
= BytesIO();
3788 retrieved_headers
= BytesIO();
3790 if(httpmethod
=="GET"):
3791 geturls_text
= pycurl
.Curl();
3792 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3793 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3794 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3795 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3796 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3797 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3798 geturls_text
.perform();
3799 elif(httpmethod
=="POST"):
3800 geturls_text
= pycurl
.Curl();
3801 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3802 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3803 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3804 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3805 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3806 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3807 geturls_text
.setopt(geturls_text
.POST
, True);
3808 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
3809 geturls_text
.perform();
3811 geturls_text
= pycurl
.Curl();
3812 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3813 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3814 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3815 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3816 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3817 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3818 geturls_text
.perform();
3819 retrieved_headers
.seek(0);
3820 if(sys
.version
[0]=="2"):
3821 pycurlhead
= retrieved_headers
.read();
3822 if(sys
.version
[0]>="3"):
3823 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
3824 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip().rstrip('\r\n'))[0];
3825 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
3826 retrieved_body
.seek(0);
3827 except socket
.timeout
:
3828 log
.info("Error With URL "+httpurl
);
3830 except socket
.gaierror
:
3831 log
.info("Error With URL "+httpurl
);
3834 log
.info("Error With URL "+httpurl
);
3836 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
3837 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
3838 httpversionout
= pyhttpverinfo
[0];
3839 httpmethodout
= httpmethod
;
3840 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
3841 httpheaderout
= pycurlheadersout
;
3842 httpheadersentout
= httpheaders
;
3843 if(isinstance(httpheaderout
, list)):
3844 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
3845 if(sys
.version
[0]=="2"):
3847 prehttpheaderout
= httpheaderout
;
3848 httpheaderkeys
= httpheaderout
.keys();
3849 imax
= len(httpheaderkeys
);
3853 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3855 except AttributeError:
3857 httpheaderout
= fix_header_names(httpheaderout
);
3858 if(isinstance(httpheadersentout
, list)):
3859 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
3860 httpheadersentout
= fix_header_names(httpheadersentout
);
3861 downloadsize
= httpheaderout
.get('Content-Length');
3862 if(downloadsize
is not None):
3863 downloadsize
= int(downloadsize
);
3864 if downloadsize
is None: downloadsize
= 0;
3867 log
.info("Downloading URL "+httpurl
);
3868 with
BytesIO() as strbuf
:
3870 databytes
= retrieved_body
.read(buffersize
);
3871 if not databytes
: break;
3872 datasize
= len(databytes
);
3873 fulldatasize
= datasize
+ fulldatasize
;
3876 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3877 downloaddiff
= fulldatasize
- prevdownsize
;
3878 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3879 prevdownsize
= fulldatasize
;
3880 strbuf
.write(databytes
);
3882 returnval_content
= strbuf
.read();
3883 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3885 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3888 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3890 returnval_content
= zlib
.decompress(returnval_content
);
3893 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3895 returnval_content
= brotli
.decompress(returnval_content
);
3896 except brotli
.error
:
3898 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3900 returnval_content
= zstandard
.decompress(returnval_content
);
3901 except zstandard
.error
:
3903 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3905 returnval_content
= lzma
.decompress(returnval_content
);
3906 except zstandard
.error
:
3908 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3910 returnval_content
= bz2
.decompress(returnval_content
);
3911 except zstandard
.error
:
3913 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl"};
3914 geturls_text
.close();
3918 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3919 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3923 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3924 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3925 exec_time_start
= time
.time();
3926 myhash
= hashlib
.new("sha1");
3927 if(sys
.version
[0]=="2"):
3928 myhash
.update(httpurl
);
3929 myhash
.update(str(buffersize
));
3930 myhash
.update(str(exec_time_start
));
3931 if(sys
.version
[0]>="3"):
3932 myhash
.update(httpurl
.encode('utf-8'));
3933 myhash
.update(str(buffersize
).encode('utf-8'));
3934 myhash
.update(str(exec_time_start
).encode('utf-8'));
3935 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3937 sleep
= geturls_download_sleep
;
3940 pretmpfilename
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3941 if(not pretmpfilename
):
3943 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3944 tmpfilename
= f
.name
;
3946 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3947 except AttributeError:
3949 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3954 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3955 f
.write(pretmpfilename
.get('Content'));
3957 exec_time_end
= time
.time();
3958 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3959 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3963 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3964 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3968 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3969 global geturls_download_sleep
, havezstd
, havebrotli
;
3971 sleep
= geturls_download_sleep
;
3974 if(not outfile
=="-"):
3975 outpath
= outpath
.rstrip(os
.path
.sep
);
3976 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3977 if(not os
.path
.exists(outpath
)):
3978 os
.makedirs(outpath
);
3979 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3981 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3983 pretmpfilename
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3984 if(not pretmpfilename
):
3986 tmpfilename
= pretmpfilename
.get('Filename');
3987 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3989 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3990 exec_time_start
= time
.time();
3991 shutil
.move(tmpfilename
, filepath
);
3993 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3994 except AttributeError:
3996 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4001 exec_time_end
= time
.time();
4002 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4003 if(os
.path
.exists(tmpfilename
)):
4004 os
.remove(tmpfilename
);
4005 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4007 pretmpfilename
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4008 tmpfilename
= pretmpfilename
.get('Filename');
4009 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4012 exec_time_start
= time
.time();
4013 with
open(tmpfilename
, 'rb') as ft
:
4016 databytes
= ft
.read(buffersize
[1]);
4017 if not databytes
: break;
4018 datasize
= len(databytes
);
4019 fulldatasize
= datasize
+ fulldatasize
;
4022 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4023 downloaddiff
= fulldatasize
- prevdownsize
;
4024 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4025 prevdownsize
= fulldatasize
;
4028 fdata
= f
.getvalue();
4031 os
.remove(tmpfilename
);
4032 exec_time_end
= time
.time();
4033 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4034 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4038 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4039 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4042 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4043 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4044 global geturls_download_sleep
, havezstd
, havebrotli
;
4046 sleep
= geturls_download_sleep
;
4049 urlparts
= urlparse
.urlparse(httpurl
);
4050 if(isinstance(httpheaders
, list)):
4051 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4052 httpheaders
= fix_header_names(httpheaders
);
4053 if(httpuseragent
is not None):
4054 if('User-Agent' in httpheaders
):
4055 httpheaders
['User-Agent'] = httpuseragent
;
4057 httpuseragent
.update({'User-Agent': httpuseragent
});
4058 if(httpreferer
is not None):
4059 if('Referer' in httpheaders
):
4060 httpheaders
['Referer'] = httpreferer
;
4062 httpuseragent
.update({'Referer': httpreferer
});
4063 if(urlparts
.username
is not None or urlparts
.password
is not None):
4064 if(sys
.version
[0]=="2"):
4065 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
4066 if(sys
.version
[0]>="3"):
4067 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4068 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4069 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
4070 if(isinstance(httpheaders
, dict)):
4071 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
4072 geturls_opener
.addheaders
= httpheaders
;
4074 if(postdata
is not None and not isinstance(postdata
, dict)):
4075 postdata
= urlencode(postdata
);
4076 retrieved_body
= BytesIO();
4077 retrieved_headers
= BytesIO();
4079 if(httpmethod
=="GET"):
4080 geturls_text
= pycurl
.Curl();
4081 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4082 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4083 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4084 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4085 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4086 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4087 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4088 geturls_text
.perform();
4089 elif(httpmethod
=="POST"):
4090 geturls_text
= pycurl
.Curl();
4091 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4092 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4093 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4094 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4095 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4096 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4097 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4098 geturls_text
.setopt(geturls_text
.POST
, True);
4099 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
4100 geturls_text
.perform();
4102 geturls_text
= pycurl
.Curl();
4103 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4104 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4105 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4106 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4107 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4108 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4109 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4110 geturls_text
.perform();
4111 retrieved_headers
.seek(0);
4112 if(sys
.version
[0]=="2"):
4113 pycurlhead
= retrieved_headers
.read();
4114 if(sys
.version
[0]>="3"):
4115 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
4116 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip())[0];
4117 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
4118 retrieved_body
.seek(0);
4119 except socket
.timeout
:
4120 log
.info("Error With URL "+httpurl
);
4122 except socket
.gaierror
:
4123 log
.info("Error With URL "+httpurl
);
4126 log
.info("Error With URL "+httpurl
);
4128 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
4129 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
4130 httpversionout
= pyhttpverinfo
[0];
4131 httpmethodout
= httpmethod
;
4132 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
4133 httpheaderout
= pycurlheadersout
;
4134 httpheadersentout
= httpheaders
;
4135 if(isinstance(httpheaderout
, list)):
4136 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
4137 if(sys
.version
[0]=="2"):
4139 prehttpheaderout
= httpheaderout
;
4140 httpheaderkeys
= httpheaderout
.keys();
4141 imax
= len(httpheaderkeys
);
4145 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4147 except AttributeError:
4149 httpheaderout
= fix_header_names(httpheaderout
);
4150 if(isinstance(httpheadersentout
, list)):
4151 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
4152 httpheadersentout
= fix_header_names(httpheadersentout
);
4153 downloadsize
= httpheaderout
.get('Content-Length');
4154 if(downloadsize
is not None):
4155 downloadsize
= int(downloadsize
);
4156 if downloadsize
is None: downloadsize
= 0;
4159 log
.info("Downloading URL "+httpurl
);
4160 with
BytesIO() as strbuf
:
4162 databytes
= retrieved_body
.read(buffersize
);
4163 if not databytes
: break;
4164 datasize
= len(databytes
);
4165 fulldatasize
= datasize
+ fulldatasize
;
4168 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4169 downloaddiff
= fulldatasize
- prevdownsize
;
4170 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4171 prevdownsize
= fulldatasize
;
4172 strbuf
.write(databytes
);
4174 returnval_content
= strbuf
.read();
4175 if(httpheaderout
.get("Content-Encoding")=="gzip"):
4177 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
4180 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
4182 returnval_content
= zlib
.decompress(returnval_content
);
4185 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4187 returnval_content
= brotli
.decompress(returnval_content
);
4188 except brotli
.error
:
4190 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
4192 returnval_content
= zstandard
.decompress(returnval_content
);
4193 except zstandard
.error
:
4195 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
4197 returnval_content
= lzma
.decompress(returnval_content
);
4198 except zstandard
.error
:
4200 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
4202 returnval_content
= bz2
.decompress(returnval_content
);
4203 except zstandard
.error
:
4205 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl2"};
4206 geturls_text
.close();
4210 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4211 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4214 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4215 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4216 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4219 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4220 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4221 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4222 exec_time_start
= time
.time();
4223 myhash
= hashlib
.new("sha1");
4224 if(sys
.version
[0]=="2"):
4225 myhash
.update(httpurl
);
4226 myhash
.update(str(buffersize
));
4227 myhash
.update(str(exec_time_start
));
4228 if(sys
.version
[0]>="3"):
4229 myhash
.update(httpurl
.encode('utf-8'));
4230 myhash
.update(str(buffersize
).encode('utf-8'));
4231 myhash
.update(str(exec_time_start
).encode('utf-8'));
4232 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4234 sleep
= geturls_download_sleep
;
4237 pretmpfilename
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4238 if(not pretmpfilename
):
4240 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4241 tmpfilename
= f
.name
;
4243 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4244 except AttributeError:
4246 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4251 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4252 f
.write(pretmpfilename
.get('Content'));
4254 exec_time_end
= time
.time();
4255 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4256 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4260 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4261 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4264 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4265 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4266 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4269 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4270 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4271 global geturls_download_sleep
, havezstd
, havebrotli
;
4273 sleep
= geturls_download_sleep
;
4276 if(not outfile
=="-"):
4277 outpath
= outpath
.rstrip(os
.path
.sep
);
4278 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4279 if(not os
.path
.exists(outpath
)):
4280 os
.makedirs(outpath
);
4281 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4283 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4285 pretmpfilename
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4286 if(not pretmpfilename
):
4288 tmpfilename
= pretmpfilename
.get('Filename');
4289 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4291 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4292 exec_time_start
= time
.time();
4293 shutil
.move(tmpfilename
, filepath
);
4295 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4296 except AttributeError:
4298 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4303 exec_time_end
= time
.time();
4304 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4305 if(os
.path
.exists(tmpfilename
)):
4306 os
.remove(tmpfilename
);
4307 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4309 pretmpfilename
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4310 tmpfilename
= pretmpfilename
.get('Filename');
4311 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4314 exec_time_start
= time
.time();
4315 with
open(tmpfilename
, 'rb') as ft
:
4318 databytes
= ft
.read(buffersize
[1]);
4319 if not databytes
: break;
4320 datasize
= len(databytes
);
4321 fulldatasize
= datasize
+ fulldatasize
;
4324 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4325 downloaddiff
= fulldatasize
- prevdownsize
;
4326 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4327 prevdownsize
= fulldatasize
;
4330 fdata
= f
.getvalue();
4333 os
.remove(tmpfilename
);
4334 exec_time_end
= time
.time();
4335 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4336 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4340 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4341 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4344 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4345 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4346 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4349 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4350 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4351 global geturls_download_sleep
, havezstd
, havebrotli
;
4353 sleep
= geturls_download_sleep
;
4356 urlparts
= urlparse
.urlparse(httpurl
);
4357 if(isinstance(httpheaders
, list)):
4358 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4359 httpheaders
= fix_header_names(httpheaders
);
4360 if(httpuseragent
is not None):
4361 if('User-Agent' in httpheaders
):
4362 httpheaders
['User-Agent'] = httpuseragent
;
4364 httpuseragent
.update({'User-Agent': httpuseragent
});
4365 if(httpreferer
is not None):
4366 if('Referer' in httpheaders
):
4367 httpheaders
['Referer'] = httpreferer
;
4369 httpuseragent
.update({'Referer': httpreferer
});
4370 if(urlparts
.username
is not None or urlparts
.password
is not None):
4371 if(sys
.version
[0]=="2"):
4372 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
4373 if(sys
.version
[0]>="3"):
4374 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4375 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4376 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
4377 if(isinstance(httpheaders
, dict)):
4378 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
4379 geturls_opener
.addheaders
= httpheaders
;
4381 if(postdata
is not None and not isinstance(postdata
, dict)):
4382 postdata
= urlencode(postdata
);
4383 retrieved_body
= BytesIO();
4384 retrieved_headers
= BytesIO();
4386 if(httpmethod
=="GET"):
4387 geturls_text
= pycurl
.Curl();
4388 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4389 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4390 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4391 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4392 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4393 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4394 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4395 geturls_text
.perform();
4396 elif(httpmethod
=="POST"):
4397 geturls_text
= pycurl
.Curl();
4398 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4399 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4400 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4401 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4402 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4403 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4404 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4405 geturls_text
.setopt(geturls_text
.POST
, True);
4406 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
4407 geturls_text
.perform();
4409 geturls_text
= pycurl
.Curl();
4410 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4411 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4412 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4413 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4414 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4415 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4416 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4417 geturls_text
.perform();
4418 retrieved_headers
.seek(0);
4419 if(sys
.version
[0]=="2"):
4420 pycurlhead
= retrieved_headers
.read();
4421 if(sys
.version
[0]>="3"):
4422 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
4423 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip().rstrip('\r\n'))[0];
4424 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
4425 retrieved_body
.seek(0);
4426 except socket
.timeout
:
4427 log
.info("Error With URL "+httpurl
);
4429 except socket
.gaierror
:
4430 log
.info("Error With URL "+httpurl
);
4433 log
.info("Error With URL "+httpurl
);
4435 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
4436 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
4437 httpversionout
= pyhttpverinfo
[0];
4438 httpmethodout
= httpmethod
;
4439 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
4440 httpheaderout
= pycurlheadersout
;
4441 httpheadersentout
= httpheaders
;
4442 if(isinstance(httpheaderout
, list)):
4443 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
4444 if(sys
.version
[0]=="2"):
4446 prehttpheaderout
= httpheaderout
;
4447 httpheaderkeys
= httpheaderout
.keys();
4448 imax
= len(httpheaderkeys
);
4452 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4454 except AttributeError:
4456 httpheaderout
= fix_header_names(httpheaderout
);
4457 if(isinstance(httpheadersentout
, list)):
4458 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
4459 httpheadersentout
= fix_header_names(httpheadersentout
);
4460 downloadsize
= httpheaderout
.get('Content-Length');
4461 if(downloadsize
is not None):
4462 downloadsize
= int(downloadsize
);
4463 if downloadsize
is None: downloadsize
= 0;
4466 log
.info("Downloading URL "+httpurl
);
4467 with
BytesIO() as strbuf
:
4469 databytes
= retrieved_body
.read(buffersize
);
4470 if not databytes
: break;
4471 datasize
= len(databytes
);
4472 fulldatasize
= datasize
+ fulldatasize
;
4475 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4476 downloaddiff
= fulldatasize
- prevdownsize
;
4477 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4478 prevdownsize
= fulldatasize
;
4479 strbuf
.write(databytes
);
4481 returnval_content
= strbuf
.read();
4482 if(httpheaderout
.get("Content-Encoding")=="gzip"):
4484 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
4487 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
4489 returnval_content
= zlib
.decompress(returnval_content
);
4492 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4494 returnval_content
= brotli
.decompress(returnval_content
);
4495 except brotli
.error
:
4497 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
4499 returnval_content
= zstandard
.decompress(returnval_content
);
4500 except zstandard
.error
:
4502 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
4504 returnval_content
= lzma
.decompress(returnval_content
);
4505 except zstandard
.error
:
4507 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
4509 returnval_content
= bz2
.decompress(returnval_content
);
4510 except zstandard
.error
:
4512 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl3"};
4513 geturls_text
.close();
4517 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4518 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4521 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4522 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4523 returnval
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4526 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4527 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4528 returnval
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4531 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4532 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4533 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4534 exec_time_start
= time
.time();
4535 myhash
= hashlib
.new("sha1");
4536 if(sys
.version
[0]=="2"):
4537 myhash
.update(httpurl
);
4538 myhash
.update(str(buffersize
));
4539 myhash
.update(str(exec_time_start
));
4540 if(sys
.version
[0]>="3"):
4541 myhash
.update(httpurl
.encode('utf-8'));
4542 myhash
.update(str(buffersize
).encode('utf-8'));
4543 myhash
.update(str(exec_time_start
).encode('utf-8'));
4544 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4546 sleep
= geturls_download_sleep
;
4549 pretmpfilename
= download_from_url_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4550 if(not pretmpfilename
):
4552 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4553 tmpfilename
= f
.name
;
4555 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4556 except AttributeError:
4558 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4563 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4564 f
.write(pretmpfilename
.get('Content'));
4566 exec_time_end
= time
.time();
4567 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4568 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4572 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4573 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4576 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4577 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4578 returnval
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4581 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4582 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4583 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4586 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4587 def download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4588 global geturls_download_sleep
, havezstd
, havebrotli
;
4590 sleep
= geturls_download_sleep
;
4593 if(not outfile
=="-"):
4594 outpath
= outpath
.rstrip(os
.path
.sep
);
4595 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4596 if(not os
.path
.exists(outpath
)):
4597 os
.makedirs(outpath
);
4598 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4600 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4602 pretmpfilename
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4603 if(not pretmpfilename
):
4605 tmpfilename
= pretmpfilename
.get('Filename');
4606 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4608 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4609 exec_time_start
= time
.time();
4610 shutil
.move(tmpfilename
, filepath
);
4612 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4613 except AttributeError:
4615 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4620 exec_time_end
= time
.time();
4621 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4622 if(os
.path
.exists(tmpfilename
)):
4623 os
.remove(tmpfilename
);
4624 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4626 pretmpfilename
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4627 tmpfilename
= pretmpfilename
.get('Filename');
4628 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4631 exec_time_start
= time
.time();
4632 with
open(tmpfilename
, 'rb') as ft
:
4635 databytes
= ft
.read(buffersize
[1]);
4636 if not databytes
: break;
4637 datasize
= len(databytes
);
4638 fulldatasize
= datasize
+ fulldatasize
;
4641 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4642 downloaddiff
= fulldatasize
- prevdownsize
;
4643 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4644 prevdownsize
= fulldatasize
;
4647 fdata
= f
.getvalue();
4650 os
.remove(tmpfilename
);
4651 exec_time_end
= time
.time();
4652 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4653 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4657 def download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4658 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4661 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4662 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4663 returnval
= download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4666 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4667 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4668 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4671 def download_file_from_ftp_file(url
):
4672 urlparts
= urlparse
.urlparse(url
);
4673 file_name
= os
.path
.basename(urlparts
.path
);
4674 file_dir
= os
.path
.dirname(urlparts
.path
);
4675 if(urlparts
.username
is not None):
4676 ftp_username
= urlparts
.username
;
4678 ftp_username
= "anonymous";
4679 if(urlparts
.password
is not None):
4680 ftp_password
= urlparts
.password
;
4681 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4682 ftp_password
= "anonymous";
4685 if(urlparts
.scheme
=="ftp"):
4687 elif(urlparts
.scheme
=="ftps"):
4691 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4693 ftp_port
= urlparts
.port
;
4694 if(urlparts
.port
is None):
4697 ftp
.connect(urlparts
.hostname
, ftp_port
);
4698 except socket
.gaierror
:
4699 log
.info("Error With URL "+httpurl
);
4701 except socket
.timeout
:
4702 log
.info("Error With URL "+httpurl
);
4704 ftp
.login(urlparts
.username
, urlparts
.password
);
4705 if(urlparts
.scheme
=="ftps"):
4707 ftpfile
= BytesIO();
4708 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
4709 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
4714 def download_file_from_ftp_string(url
):
4715 ftpfile
= download_file_from_ftp_file(url
);
4716 return ftpfile
.read();
4718 def download_from_url_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4719 global geturls_download_sleep
, havezstd
, havebrotli
;
4721 sleep
= geturls_download_sleep
;
4724 urlparts
= urlparse
.urlparse(httpurl
);
4725 if(isinstance(httpheaders
, list)):
4726 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4727 httpheaders
= fix_header_names(httpheaders
);
4728 if(httpuseragent
is not None):
4729 if('User-Agent' in httpheaders
):
4730 httpheaders
['User-Agent'] = httpuseragent
;
4732 httpuseragent
.update({'User-Agent': httpuseragent
});
4733 if(httpreferer
is not None):
4734 if('Referer' in httpheaders
):
4735 httpheaders
['Referer'] = httpreferer
;
4737 httpuseragent
.update({'Referer': httpreferer
});
4738 if(isinstance(httpheaders
, dict)):
4739 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4741 geturls_text
= download_file_from_ftp_file(httpurl
);
4742 if(not geturls_text
):
4744 downloadsize
= None;
4745 if(downloadsize
is not None):
4746 downloadsize
= int(downloadsize
);
4747 if downloadsize
is None: downloadsize
= 0;
4750 log
.info("Downloading URL "+httpurl
);
4751 with
BytesIO() as strbuf
:
4753 databytes
= geturls_text
.read(buffersize
);
4754 if not databytes
: break;
4755 datasize
= len(databytes
);
4756 fulldatasize
= datasize
+ fulldatasize
;
4759 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4760 downloaddiff
= fulldatasize
- prevdownsize
;
4761 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4762 prevdownsize
= fulldatasize
;
4763 strbuf
.write(databytes
);
4765 returnval_content
= strbuf
.read();
4766 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4767 geturls_text
.close();
4770 def download_from_url_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4771 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4772 exec_time_start
= time
.time();
4773 myhash
= hashlib
.new("sha1");
4774 if(sys
.version
[0]=="2"):
4775 myhash
.update(httpurl
);
4776 myhash
.update(str(buffersize
));
4777 myhash
.update(str(exec_time_start
));
4778 if(sys
.version
[0]>="3"):
4779 myhash
.update(httpurl
.encode('utf-8'));
4780 myhash
.update(str(buffersize
).encode('utf-8'));
4781 myhash
.update(str(exec_time_start
).encode('utf-8'));
4782 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4784 sleep
= geturls_download_sleep
;
4787 pretmpfilename
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4788 if(not pretmpfilename
):
4790 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4791 tmpfilename
= f
.name
;
4793 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4794 except AttributeError:
4796 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4801 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4802 f
.write(pretmpfilename
.get('Content'));
4804 exec_time_end
= time
.time();
4805 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4806 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4809 def download_from_url_to_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4810 global geturls_download_sleep
, havezstd
, havebrotli
;
4812 sleep
= geturls_download_sleep
;
4815 if(not outfile
=="-"):
4816 outpath
= outpath
.rstrip(os
.path
.sep
);
4817 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4818 if(not os
.path
.exists(outpath
)):
4819 os
.makedirs(outpath
);
4820 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4822 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4824 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4825 if(not pretmpfilename
):
4827 tmpfilename
= pretmpfilename
.get('Filename');
4828 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4830 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4831 exec_time_start
= time
.time();
4832 shutil
.move(tmpfilename
, filepath
);
4833 exec_time_end
= time
.time();
4834 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4835 if(os
.path
.exists(tmpfilename
)):
4836 os
.remove(tmpfilename
);
4837 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4839 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4840 tmpfilename
= pretmpfilename
.get('Filename');
4841 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4844 exec_time_start
= time
.time();
4845 with
open(tmpfilename
, 'rb') as ft
:
4848 databytes
= ft
.read(buffersize
[1]);
4849 if not databytes
: break;
4850 datasize
= len(databytes
);
4851 fulldatasize
= datasize
+ fulldatasize
;
4854 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4855 downloaddiff
= fulldatasize
- prevdownsize
;
4856 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4857 prevdownsize
= fulldatasize
;
4860 fdata
= f
.getvalue();
4863 os
.remove(tmpfilename
);
4864 exec_time_end
= time
.time();
4865 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4866 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4869 def upload_file_to_ftp_file(ftpfile
, url
):
4870 urlparts
= urlparse
.urlparse(url
);
4871 file_name
= os
.path
.basename(urlparts
.path
);
4872 file_dir
= os
.path
.dirname(urlparts
.path
);
4873 if(urlparts
.username
is not None):
4874 ftp_username
= urlparts
.username
;
4876 ftp_username
= "anonymous";
4877 if(urlparts
.password
is not None):
4878 ftp_password
= urlparts
.password
;
4879 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4880 ftp_password
= "anonymous";
4883 if(urlparts
.scheme
=="ftp"):
4885 elif(urlparts
.scheme
=="ftps"):
4889 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4891 ftp_port
= urlparts
.port
;
4892 if(urlparts
.port
is None):
4895 ftp
.connect(urlparts
.hostname
, ftp_port
);
4896 except socket
.gaierror
:
4897 log
.info("Error With URL "+httpurl
);
4899 except socket
.timeout
:
4900 log
.info("Error With URL "+httpurl
);
4902 ftp
.login(urlparts
.username
, urlparts
.password
);
4903 if(urlparts
.scheme
=="ftps"):
4905 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
4910 def upload_file_to_ftp_string(ftpstring
, url
):
4911 ftpfileo
= BytesIO(ftpstring
);
4912 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
4917 def download_file_from_sftp_file(url
):
4918 urlparts
= urlparse
.urlparse(url
);
4919 file_name
= os
.path
.basename(urlparts
.path
);
4920 file_dir
= os
.path
.dirname(urlparts
.path
);
4921 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4923 sftp_port
= urlparts
.port
;
4924 if(urlparts
.port
is None):
4927 sftp_port
= urlparts
.port
;
4928 if(urlparts
.username
is not None):
4929 sftp_username
= urlparts
.username
;
4931 sftp_username
= "anonymous";
4932 if(urlparts
.password
is not None):
4933 sftp_password
= urlparts
.password
;
4934 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4935 sftp_password
= "anonymous";
4938 if(urlparts
.scheme
!="sftp"):
4940 ssh
= paramiko
.SSHClient();
4941 ssh
.load_system_host_keys();
4942 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
4944 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
4945 except paramiko
.ssh_exception
.SSHException
:
4947 except socket
.gaierror
:
4948 log
.info("Error With URL "+httpurl
);
4950 except socket
.timeout
:
4951 log
.info("Error With URL "+httpurl
);
4953 sftp
= ssh
.open_sftp();
4954 sftpfile
= BytesIO();
4955 sftp
.getfo(urlparts
.path
, sftpfile
);
4958 sftpfile
.seek(0, 0);
4961 def download_file_from_sftp_file(url
):
4965 def download_file_from_sftp_string(url
):
4966 sftpfile
= download_file_from_sftp_file(url
);
4967 return sftpfile
.read();
4969 def download_file_from_ftp_string(url
):
4973 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4974 global geturls_download_sleep
, havezstd
, havebrotli
;
4976 sleep
= geturls_download_sleep
;
4979 urlparts
= urlparse
.urlparse(httpurl
);
4980 if(isinstance(httpheaders
, list)):
4981 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4982 httpheaders
= fix_header_names(httpheaders
);
4983 if(httpuseragent
is not None):
4984 if('User-Agent' in httpheaders
):
4985 httpheaders
['User-Agent'] = httpuseragent
;
4987 httpuseragent
.update({'User-Agent': httpuseragent
});
4988 if(httpreferer
is not None):
4989 if('Referer' in httpheaders
):
4990 httpheaders
['Referer'] = httpreferer
;
4992 httpuseragent
.update({'Referer': httpreferer
});
4993 if(isinstance(httpheaders
, dict)):
4994 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4996 geturls_text
= download_file_from_sftp_file(httpurl
);
4997 if(not geturls_text
):
4999 downloadsize
= None;
5000 if(downloadsize
is not None):
5001 downloadsize
= int(downloadsize
);
5002 if downloadsize
is None: downloadsize
= 0;
5005 log
.info("Downloading URL "+httpurl
);
5006 with
BytesIO() as strbuf
:
5008 databytes
= geturls_text
.read(buffersize
);
5009 if not databytes
: break;
5010 datasize
= len(databytes
);
5011 fulldatasize
= datasize
+ fulldatasize
;
5014 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5015 downloaddiff
= fulldatasize
- prevdownsize
;
5016 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5017 prevdownsize
= fulldatasize
;
5018 strbuf
.write(databytes
);
5020 returnval_content
= strbuf
.read();
5021 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5022 geturls_text
.close();
5025 if(not haveparamiko
):
5026 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5030 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5031 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
5032 exec_time_start
= time
.time();
5033 myhash
= hashlib
.new("sha1");
5034 if(sys
.version
[0]=="2"):
5035 myhash
.update(httpurl
);
5036 myhash
.update(str(buffersize
));
5037 myhash
.update(str(exec_time_start
));
5038 if(sys
.version
[0]>="3"):
5039 myhash
.update(httpurl
.encode('utf-8'));
5040 myhash
.update(str(buffersize
).encode('utf-8'));
5041 myhash
.update(str(exec_time_start
).encode('utf-8'));
5042 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
5044 sleep
= geturls_download_sleep
;
5047 pretmpfilename
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
5048 if(not pretmpfilename
):
5050 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
5051 tmpfilename
= f
.name
;
5053 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
5054 except AttributeError:
5056 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
5061 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5062 f
.write(pretmpfilename
.get('Content'));
5064 exec_time_end
= time
.time();
5065 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
5066 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
5069 if(not haveparamiko
):
5070 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5074 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5075 global geturls_download_sleep
, havezstd
, havebrotli
;
5077 sleep
= geturls_download_sleep
;
5080 if(not outfile
=="-"):
5081 outpath
= outpath
.rstrip(os
.path
.sep
);
5082 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
5083 if(not os
.path
.exists(outpath
)):
5084 os
.makedirs(outpath
);
5085 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
5087 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
5089 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5090 if(not pretmpfilename
):
5092 tmpfilename
= pretmpfilename
.get('Filename');
5093 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5095 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
5096 exec_time_start
= time
.time();
5097 shutil
.move(tmpfilename
, filepath
);
5098 exec_time_end
= time
.time();
5099 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
5100 if(os
.path
.exists(tmpfilename
)):
5101 os
.remove(tmpfilename
);
5102 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5104 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5105 tmpfilename
= pretmpfilename
.get('Filename');
5106 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5109 exec_time_start
= time
.time();
5110 with
open(tmpfilename
, 'rb') as ft
:
5113 databytes
= ft
.read(buffersize
[1]);
5114 if not databytes
: break;
5115 datasize
= len(databytes
);
5116 fulldatasize
= datasize
+ fulldatasize
;
5119 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5120 downloaddiff
= fulldatasize
- prevdownsize
;
5121 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5122 prevdownsize
= fulldatasize
;
5125 fdata
= f
.getvalue();
5128 os
.remove(tmpfilename
);
5129 exec_time_end
= time
.time();
5130 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5131 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5134 if(not haveparamiko
):
5135 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5139 def upload_file_to_sftp_file(sftpfile
, url
):
5140 urlparts
= urlparse
.urlparse(url
);
5141 file_name
= os
.path
.basename(urlparts
.path
);
5142 file_dir
= os
.path
.dirname(urlparts
.path
);
5143 sftp_port
= urlparts
.port
;
5144 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5146 if(urlparts
.port
is None):
5149 sftp_port
= urlparts
.port
;
5150 if(urlparts
.username
is not None):
5151 sftp_username
= urlparts
.username
;
5153 sftp_username
= "anonymous";
5154 if(urlparts
.password
is not None):
5155 sftp_password
= urlparts
.password
;
5156 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5157 sftp_password
= "anonymous";
5160 if(urlparts
.scheme
!="sftp"):
5162 ssh
= paramiko
.SSHClient();
5163 ssh
.load_system_host_keys();
5164 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
5166 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5167 except paramiko
.ssh_exception
.SSHException
:
5169 except socket
.gaierror
:
5170 log
.info("Error With URL "+httpurl
);
5172 except socket
.timeout
:
5173 log
.info("Error With URL "+httpurl
);
5175 sftp
= ssh
.open_sftp();
5176 sftp
.putfo(sftpfile
, urlparts
.path
);
5179 sftpfile
.seek(0, 0);
5182 def upload_file_to_sftp_file(sftpfile
, url
):
5186 def upload_file_to_sftp_string(sftpstring
, url
):
5187 sftpfileo
= BytesIO(sftpstring
);
5188 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
5192 def upload_file_to_sftp_string(url
):
5197 def download_file_from_pysftp_file(url
):
5198 urlparts
= urlparse
.urlparse(url
);
5199 file_name
= os
.path
.basename(urlparts
.path
);
5200 file_dir
= os
.path
.dirname(urlparts
.path
);
5201 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5203 sftp_port
= urlparts
.port
;
5204 if(urlparts
.port
is None):
5207 sftp_port
= urlparts
.port
;
5208 if(urlparts
.username
is not None):
5209 sftp_username
= urlparts
.username
;
5211 sftp_username
= "anonymous";
5212 if(urlparts
.password
is not None):
5213 sftp_password
= urlparts
.password
;
5214 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5215 sftp_password
= "anonymous";
5218 if(urlparts
.scheme
!="sftp"):
5221 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5222 except paramiko
.ssh_exception
.SSHException
:
5224 except socket
.gaierror
:
5225 log
.info("Error With URL "+httpurl
);
5227 except socket
.timeout
:
5228 log
.info("Error With URL "+httpurl
);
5230 sftp
= ssh
.open_sftp();
5231 sftpfile
= BytesIO();
5232 sftp
.getfo(urlparts
.path
, sftpfile
);
5235 sftpfile
.seek(0, 0);
5238 def download_file_from_pysftp_file(url
):
5242 def download_file_from_pysftp_string(url
):
5243 sftpfile
= download_file_from_pysftp_file(url
);
5244 return sftpfile
.read();
5246 def download_file_from_ftp_string(url
):
5250 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5251 global geturls_download_sleep
, havezstd
, havebrotli
;
5253 sleep
= geturls_download_sleep
;
5256 urlparts
= urlparse
.urlparse(httpurl
);
5257 if(isinstance(httpheaders
, list)):
5258 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
5259 httpheaders
= fix_header_names(httpheaders
);
5260 if(isinstance(httpheaders
, dict)):
5261 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
5263 geturls_text
= download_file_from_pysftp_file(httpurl
);
5264 if(not geturls_text
):
5266 downloadsize
= None;
5267 if(downloadsize
is not None):
5268 downloadsize
= int(downloadsize
);
5269 if downloadsize
is None: downloadsize
= 0;
5272 log
.info("Downloading URL "+httpurl
);
5273 with
BytesIO() as strbuf
:
5275 databytes
= geturls_text
.read(buffersize
);
5276 if not databytes
: break;
5277 datasize
= len(databytes
);
5278 fulldatasize
= datasize
+ fulldatasize
;
5281 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5282 downloaddiff
= fulldatasize
- prevdownsize
;
5283 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5284 prevdownsize
= fulldatasize
;
5285 strbuf
.write(databytes
);
5287 returnval_content
= strbuf
.read();
5288 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5289 geturls_text
.close();
5293 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5297 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5298 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
5299 exec_time_start
= time
.time();
5300 myhash
= hashlib
.new("sha1");
5301 if(sys
.version
[0]=="2"):
5302 myhash
.update(httpurl
);
5303 myhash
.update(str(buffersize
));
5304 myhash
.update(str(exec_time_start
));
5305 if(sys
.version
[0]>="3"):
5306 myhash
.update(httpurl
.encode('utf-8'));
5307 myhash
.update(str(buffersize
).encode('utf-8'));
5308 myhash
.update(str(exec_time_start
).encode('utf-8'));
5309 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
5311 sleep
= geturls_download_sleep
;
5314 pretmpfilename
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
5315 if(not pretmpfilename
):
5317 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
5318 tmpfilename
= f
.name
;
5320 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
5321 except AttributeError:
5323 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
5328 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5329 f
.write(pretmpfilename
.get('Content'));
5331 exec_time_end
= time
.time();
5332 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
5333 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
5337 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5341 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5342 global geturls_download_sleep
, havezstd
, havebrotli
;
5344 sleep
= geturls_download_sleep
;
5347 if(not outfile
=="-"):
5348 outpath
= outpath
.rstrip(os
.path
.sep
);
5349 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
5350 if(not os
.path
.exists(outpath
)):
5351 os
.makedirs(outpath
);
5352 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
5354 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
5356 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5357 if(not pretmpfilename
):
5359 tmpfilename
= pretmpfilename
.get('Filename');
5360 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5362 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
5363 exec_time_start
= time
.time();
5364 shutil
.move(tmpfilename
, filepath
);
5365 exec_time_end
= time
.time();
5366 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
5367 if(os
.path
.exists(tmpfilename
)):
5368 os
.remove(tmpfilename
);
5369 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5371 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5372 tmpfilename
= pretmpfilename
.get('Filename');
5373 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5376 exec_time_start
= time
.time();
5377 with
open(tmpfilename
, 'rb') as ft
:
5380 databytes
= ft
.read(buffersize
[1]);
5381 if not databytes
: break;
5382 datasize
= len(databytes
);
5383 fulldatasize
= datasize
+ fulldatasize
;
5386 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5387 downloaddiff
= fulldatasize
- prevdownsize
;
5388 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5389 prevdownsize
= fulldatasize
;
5392 fdata
= f
.getvalue();
5395 os
.remove(tmpfilename
);
5396 exec_time_end
= time
.time();
5397 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5398 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5402 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5406 def upload_file_to_pysftp_file(sftpfile
, url
):
5407 urlparts
= urlparse
.urlparse(url
);
5408 file_name
= os
.path
.basename(urlparts
.path
);
5409 file_dir
= os
.path
.dirname(urlparts
.path
);
5410 sftp_port
= urlparts
.port
;
5411 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5413 if(urlparts
.port
is None):
5416 sftp_port
= urlparts
.port
;
5417 if(urlparts
.username
is not None):
5418 sftp_username
= urlparts
.username
;
5420 sftp_username
= "anonymous";
5421 if(urlparts
.password
is not None):
5422 sftp_password
= urlparts
.password
;
5423 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5424 sftp_password
= "anonymous";
5427 if(urlparts
.scheme
!="sftp"):
5430 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5431 except paramiko
.ssh_exception
.SSHException
:
5433 except socket
.gaierror
:
5434 log
.info("Error With URL "+httpurl
);
5436 except socket
.timeout
:
5437 log
.info("Error With URL "+httpurl
);
5439 sftp
= ssh
.open_sftp();
5440 sftp
.putfo(sftpfile
, urlparts
.path
);
5443 sftpfile
.seek(0, 0);
5446 def upload_file_to_pysftp_file(sftpfile
, url
):
5450 def upload_file_to_pysftp_string(sftpstring
, url
):
5451 sftpfileo
= BytesIO(sftpstring
);
5452 sftpfile
= upload_file_to_pysftp_files(ftpfileo
, url
);
5456 def upload_file_to_pysftp_string(url
):