4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwgetold.py - Last Update: 10/5/2023 Ver. 2.0.2 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, zlib
, time
, argparse
, cgi
, subprocess
, socket
, email
.utils
, datetime
, time
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
29 havemechanize
= False;
34 havemechanize
= False;
62 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
102 if(sys
.version
[0]=="2"):
104 from io
import StringIO
, BytesIO
;
107 from cStringIO
import StringIO
;
108 from cStringIO
import StringIO
as BytesIO
;
110 from StringIO
import StringIO
;
111 from StringIO
import StringIO
as BytesIO
;
112 # From http://python-future.org/compatible_idioms.html
113 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
114 from urllib
import urlencode
;
115 from urllib
import urlopen
as urlopenalt
;
116 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
, URLError
, build_opener
, HTTPCookieProcessor
;
117 import urlparse
, cookielib
;
118 from httplib
import HTTPConnection
, HTTPSConnection
;
119 if(sys
.version
[0]>="3"):
120 from io
import StringIO
, BytesIO
;
121 # From http://python-future.org/compatible_idioms.html
122 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
123 from urllib
.request
import urlopen
, Request
, install_opener
, build_opener
, HTTPCookieProcessor
;
124 from urllib
.error
import HTTPError
, URLError
;
125 import urllib
.parse
as urlparse
;
126 import http
.cookiejar
as cookielib
;
127 from http
.client
import HTTPConnection
, HTTPSConnection
;
129 __program_name__
= "PyWWW-Get";
130 __program_alt_name__
= "PyWWWGet";
131 __program_small_name__
= "wwwget";
132 __project__
= __program_name__
;
133 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
134 __version_info__
= (2, 0, 2, "RC 1", 1);
135 __version_date_info__
= (2023, 10, 5, "RC 1", 1);
136 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
137 __revision__
= __version_info__
[3];
138 __revision_id__
= "$Id$";
139 if(__version_info__
[4] is not None):
140 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
141 if(__version_info__
[4] is None):
142 __version_date_plusrc__
= __version_date__
;
143 if(__version_info__
[3] is not None):
144 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
145 if(__version_info__
[3] is None):
146 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
148 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
150 pytempdir
= tempfile
.gettempdir();
152 PyBitness
= platform
.architecture();
153 if(PyBitness
=="32bit" or PyBitness
=="32"):
155 elif(PyBitness
=="64bit" or PyBitness
=="64"):
160 compression_supported_list
= ['identity', 'gzip', 'deflate', 'bzip2'];
162 compression_supported_list
.append('br');
164 compression_supported_list
.append('zstd');
166 compression_supported_list
.append('lzma');
167 compression_supported_list
.append('xz');
168 compression_supported
= ', '.join(compression_supported_list
);
170 geturls_cj
= cookielib
.CookieJar();
171 windowsNT4_ua_string
= "Windows NT 4.0";
172 windowsNT4_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
173 windows2k_ua_string
= "Windows NT 5.0";
174 windows2k_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
175 windowsXP_ua_string
= "Windows NT 5.1";
176 windowsXP_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
177 windowsXP64_ua_string
= "Windows NT 5.2; Win64; x64";
178 windowsXP64_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
179 windows7_ua_string
= "Windows NT 6.1; Win64; x64";
180 windows7_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
181 windows8_ua_string
= "Windows NT 6.2; Win64; x64";
182 windows8_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
183 windows81_ua_string
= "Windows NT 6.3; Win64; x64";
184 windows81_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
185 windows10_ua_string
= "Windows NT 10.0; Win64; x64";
186 windows10_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
187 windows11_ua_string
= "Windows NT 11.0; Win64; x64";
188 windows11_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
189 geturls_ua_firefox_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:109.0) Gecko/20100101 Firefox/117.0";
190 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
191 geturls_ua_chrome_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
192 geturls_ua_chromium_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
193 geturls_ua_palemoon_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
194 geturls_ua_opera_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
195 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
196 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; Trident/7.0; rv:11.0) like Gecko";
197 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
198 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
199 if(platform
.python_implementation()!=""):
200 py_implementation
= platform
.python_implementation();
201 if(platform
.python_implementation()==""):
202 py_implementation
= "Python";
203 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=py_implementation
, pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
204 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
205 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
206 geturls_ua
= geturls_ua_firefox_windows7
;
207 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
208 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
209 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
210 geturls_headers_chrome_windows7
.update(windows7_ua_addon
);
211 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
212 geturls_headers_chromium_windows7
.update(windows7_ua_addon
);
213 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
214 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
215 geturls_headers_opera_windows7
.update(windows7_ua_addon
);
216 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
217 geturls_headers_vivaldi_windows7
.update(windows7_ua_addon
);
218 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
219 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
220 geturls_headers_microsoft_edge_windows7
.update(windows7_ua_addon
);
221 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
222 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
223 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
224 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
225 geturls_headers
= geturls_headers_firefox_windows7
;
226 geturls_download_sleep
= 0;
228 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
229 if(outtype
=="print" and dbgenable
):
232 elif(outtype
=="log" and dbgenable
):
233 logging
.info(dbgtxt
);
235 elif(outtype
=="warning" and dbgenable
):
236 logging
.warning(dbgtxt
);
238 elif(outtype
=="error" and dbgenable
):
239 logging
.error(dbgtxt
);
241 elif(outtype
=="critical" and dbgenable
):
242 logging
.critical(dbgtxt
);
244 elif(outtype
=="exception" and dbgenable
):
245 logging
.exception(dbgtxt
);
247 elif(outtype
=="logalt" and dbgenable
):
248 logging
.log(dgblevel
, dbgtxt
);
250 elif(outtype
=="debug" and dbgenable
):
251 logging
.debug(dbgtxt
);
259 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
260 dbgout
= verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
265 def add_url_param(url
, **params
):
267 parts
= list(urlparse
.urlsplit(url
));
268 d
= dict(cgi
.parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
270 parts
[n
]=urlencode(d
);
271 return urlparse
.urlunsplit(parts
);
273 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
274 def which_exec(execfile):
275 for path
in os
.environ
["PATH"].split(":"):
276 if os
.path
.exists(path
+ "/" + execfile):
277 return path
+ "/" + execfile;
279 def listize(varlist
):
287 newlistreg
.update({ilx
: varlist
[il
]});
288 newlistrev
.update({varlist
[il
]: ilx
});
291 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
294 def twolistize(varlist
):
304 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
305 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
306 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
307 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
310 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
311 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
312 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
315 def arglistize(proexec
, *varlist
):
319 newarglist
= [proexec
];
321 if varlist
[il
][0] is not None:
322 newarglist
.append(varlist
[il
][0]);
323 if varlist
[il
][1] is not None:
324 newarglist
.append(varlist
[il
][1]);
328 def fix_header_names(header_dict
):
329 if(sys
.version
[0]=="2"):
330 header_dict
= {k
.title(): v
for k
, v
in header_dict
.iteritems()};
331 if(sys
.version
[0]>="3"):
332 header_dict
= {k
.title(): v
for k
, v
in header_dict
.items()};
335 # hms_string by ArcGIS Python Recipes
336 # https://arcpy.wordpress.com/2012/04/20/146/
337 def hms_string(sec_elapsed
):
338 h
= int(sec_elapsed
/ (60 * 60));
339 m
= int((sec_elapsed
% (60 * 60)) / 60);
340 s
= sec_elapsed
% 60.0;
341 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
343 # get_readable_size by Lipis
344 # http://stackoverflow.com/posts/14998888/revisions
345 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
347 if(unit
!="IEC" and unit
!="SI"):
350 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
351 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
354 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
355 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
360 if abs(bytes
) < unitsize
:
361 strformat
= "%3."+str(precision
)+"f%s";
362 pre_return_val
= (strformat
% (bytes
, unit
));
363 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
364 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
365 alt_return_val
= pre_return_val
.split();
366 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
369 strformat
= "%."+str(precision
)+"f%s";
370 pre_return_val
= (strformat
% (bytes
, "YiB"));
371 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
372 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
373 alt_return_val
= pre_return_val
.split();
374 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
377 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
379 usehashtypes
= usehashtypes
.lower();
380 getfilesize
= os
.path
.getsize(infile
);
381 return_val
= get_readable_size(getfilesize
, precision
, unit
);
383 hashtypelist
= usehashtypes
.split(",");
384 openfile
= open(infile
, "rb");
385 filecontents
= openfile
.read();
388 listnumend
= len(hashtypelist
);
389 while(listnumcount
< listnumend
):
390 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
391 hashtypelistup
= hashtypelistlow
.upper();
392 filehash
= hashlib
.new(hashtypelistup
);
393 filehash
.update(filecontents
);
394 filegethash
= filehash
.hexdigest();
395 return_val
.update({hashtypelistup
: filegethash
});
399 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
401 usehashtypes
= usehashtypes
.lower();
402 getfilesize
= len(instring
);
403 return_val
= get_readable_size(getfilesize
, precision
, unit
);
405 hashtypelist
= usehashtypes
.split(",");
407 listnumend
= len(hashtypelist
);
408 while(listnumcount
< listnumend
):
409 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
410 hashtypelistup
= hashtypelistlow
.upper();
411 filehash
= hashlib
.new(hashtypelistup
);
412 if(sys
.version
[0]=="2"):
413 filehash
.update(instring
);
414 if(sys
.version
[0]>="3"):
415 filehash
.update(instring
.encode('utf-8'));
416 filegethash
= filehash
.hexdigest();
417 return_val
.update({hashtypelistup
: filegethash
});
421 def http_status_to_reason(code
):
424 101: 'Switching Protocols',
429 203: 'Non-Authoritative Information',
431 205: 'Reset Content',
432 206: 'Partial Content',
434 208: 'Already Reported',
436 300: 'Multiple Choices',
437 301: 'Moved Permanently',
442 307: 'Temporary Redirect',
443 308: 'Permanent Redirect',
446 402: 'Payment Required',
449 405: 'Method Not Allowed',
450 406: 'Not Acceptable',
451 407: 'Proxy Authentication Required',
452 408: 'Request Timeout',
455 411: 'Length Required',
456 412: 'Precondition Failed',
457 413: 'Payload Too Large',
459 415: 'Unsupported Media Type',
460 416: 'Range Not Satisfiable',
461 417: 'Expectation Failed',
462 421: 'Misdirected Request',
463 422: 'Unprocessable Entity',
465 424: 'Failed Dependency',
466 426: 'Upgrade Required',
467 428: 'Precondition Required',
468 429: 'Too Many Requests',
469 431: 'Request Header Fields Too Large',
470 451: 'Unavailable For Legal Reasons',
471 500: 'Internal Server Error',
472 501: 'Not Implemented',
474 503: 'Service Unavailable',
475 504: 'Gateway Timeout',
476 505: 'HTTP Version Not Supported',
477 506: 'Variant Also Negotiates',
478 507: 'Insufficient Storage',
479 508: 'Loop Detected',
481 511: 'Network Authentication Required'
483 return reasons
.get(code
, 'Unknown Status Code');
485 def ftp_status_to_reason(code
):
487 110: 'Restart marker reply',
488 120: 'Service ready in nnn minutes',
489 125: 'Data connection already open; transfer starting',
490 150: 'File status okay; about to open data connection',
492 202: 'Command not implemented, superfluous at this site',
493 211: 'System status, or system help reply',
494 212: 'Directory status',
497 215: 'NAME system type',
498 220: 'Service ready for new user',
499 221: 'Service closing control connection',
500 225: 'Data connection open; no transfer in progress',
501 226: 'Closing data connection',
502 227: 'Entering Passive Mode',
503 230: 'User logged in, proceed',
504 250: 'Requested file action okay, completed',
505 257: '"PATHNAME" created',
506 331: 'User name okay, need password',
507 332: 'Need account for login',
508 350: 'Requested file action pending further information',
509 421: 'Service not available, closing control connection',
510 425: 'Can\'t open data connection',
511 426: 'Connection closed; transfer aborted',
512 450: 'Requested file action not taken',
513 451: 'Requested action aborted. Local error in processing',
514 452: 'Requested action not taken. Insufficient storage space in system',
515 500: 'Syntax error, command unrecognized',
516 501: 'Syntax error in parameters or arguments',
517 502: 'Command not implemented',
518 503: 'Bad sequence of commands',
519 504: 'Command not implemented for that parameter',
520 530: 'Not logged in',
521 532: 'Need account for storing files',
522 550: 'Requested action not taken. File unavailable',
523 551: 'Requested action aborted. Page type unknown',
524 552: 'Requested file action aborted. Exceeded storage allocation',
525 553: 'Requested action not taken. File name not allowed'
527 return reasons
.get(code
, 'Unknown Status Code');
529 def sftp_status_to_reason(code
):
533 2: 'SSH_FX_NO_SUCH_FILE',
534 3: 'SSH_FX_PERMISSION_DENIED',
536 5: 'SSH_FX_BAD_MESSAGE',
537 6: 'SSH_FX_NO_CONNECTION',
538 7: 'SSH_FX_CONNECTION_LOST',
539 8: 'SSH_FX_OP_UNSUPPORTED'
541 return reasons
.get(code
, 'Unknown Status Code');
543 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
544 if isinstance(headers
, dict):
546 if(sys
.version
[0]=="2"):
547 for headkey
, headvalue
in headers
.iteritems():
548 returnval
.append((headkey
, headvalue
));
549 if(sys
.version
[0]>="3"):
550 for headkey
, headvalue
in headers
.items():
551 returnval
.append((headkey
, headvalue
));
552 elif isinstance(headers
, list):
558 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
559 if isinstance(headers
, dict):
561 if(sys
.version
[0]=="2"):
562 for headkey
, headvalue
in headers
.iteritems():
563 returnval
.append(headkey
+": "+headvalue
);
564 if(sys
.version
[0]>="3"):
565 for headkey
, headvalue
in headers
.items():
566 returnval
.append(headkey
+": "+headvalue
);
567 elif isinstance(headers
, list):
573 def make_http_headers_from_pycurl_to_dict(headers
):
575 headers
= headers
.strip().split('\r\n');
576 for header
in headers
:
577 parts
= header
.split(': ', 1)
580 header_dict
[key
.title()] = value
;
583 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", compression_supported
), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
584 if isinstance(headers
, list):
589 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
591 elif isinstance(headers
, dict):
597 def get_httplib_support(checkvalue
=None):
598 global haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
600 returnval
.append("ftp");
601 returnval
.append("httplib");
603 returnval
.append("httplib2");
604 returnval
.append("urllib");
606 returnval
.append("urllib3");
607 returnval
.append("request3");
608 returnval
.append("request");
610 returnval
.append("requests");
612 returnval
.append("aiohttp");
614 returnval
.append("httpx");
615 returnval
.append("httpx2");
617 returnval
.append("mechanize");
619 returnval
.append("pycurl");
620 if(hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
621 returnval
.append("pycurl2");
622 if(hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
623 returnval
.append("pycurl3");
625 returnval
.append("sftp");
627 returnval
.append("pysftp");
628 if(not checkvalue
is None):
629 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
630 checkvalue
= "urllib";
631 if(checkvalue
=="httplib1"):
632 checkvalue
= "httplib";
633 if(checkvalue
in returnval
):
639 def check_httplib_support(checkvalue
="urllib"):
640 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
641 checkvalue
= "urllib";
642 if(checkvalue
=="httplib1"):
643 checkvalue
= "httplib";
644 returnval
= get_httplib_support(checkvalue
);
647 def get_httplib_support_list():
648 returnval
= get_httplib_support(None);
651 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1, timeout
=10):
652 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
654 sleep
= geturls_download_sleep
;
657 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
658 httplibuse
= "urllib";
659 if(httplibuse
=="httplib1"):
660 httplibuse
= "httplib";
661 if(not haverequests
and httplibuse
=="requests"):
662 httplibuse
= "urllib";
663 if(not haveaiohttp
and httplibuse
=="aiohttp"):
664 httplibuse
= "urllib";
665 if(not havehttpx
and httplibuse
=="httpx"):
666 httplibuse
= "urllib";
667 if(not havehttpx
and httplibuse
=="httpx2"):
668 httplibuse
= "urllib";
669 if(not havehttpcore
and httplibuse
=="httpcore"):
670 httplibuse
= "urllib";
671 if(not havehttpcore
and httplibuse
=="httpcore2"):
672 httplibuse
= "urllib";
673 if(not havemechanize
and httplibuse
=="mechanize"):
674 httplibuse
= "urllib";
675 if(not havepycurl
and httplibuse
=="pycurl"):
676 httplibuse
= "urllib";
677 if(not havepycurl
and httplibuse
=="pycurl2"):
678 httplibuse
= "urllib";
679 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
680 httplibuse
= "pycurl";
681 if(not havepycurl
and httplibuse
=="pycurl3"):
682 httplibuse
= "urllib";
683 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
684 httplibuse
= "pycurl2";
685 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
686 httplibuse
= "pycurl";
687 if(not havehttplib2
and httplibuse
=="httplib2"):
688 httplibuse
= "httplib";
689 if(not haveparamiko
and httplibuse
=="sftp"):
691 if(not havepysftp
and httplibuse
=="pysftp"):
693 if(httplibuse
=="urllib" or httplibuse
=="request"):
694 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
695 elif(httplibuse
=="request"):
696 returnval
= download_from_url_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
697 elif(httplibuse
=="request3"):
698 returnval
= download_from_url_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
699 elif(httplibuse
=="httplib"):
700 returnval
= download_from_url_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
701 elif(httplibuse
=="httplib2"):
702 returnval
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
703 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
704 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
705 elif(httplibuse
=="requests"):
706 returnval
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
707 elif(httplibuse
=="aiohttp"):
708 returnval
= download_from_url_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
709 elif(httplibuse
=="httpx"):
710 returnval
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
711 elif(httplibuse
=="httpx2"):
712 returnval
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
713 elif(httplibuse
=="httpcore"):
714 returnval
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
715 elif(httplibuse
=="httpcore2"):
716 returnval
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
717 elif(httplibuse
=="mechanize"):
718 returnval
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
719 elif(httplibuse
=="pycurl"):
720 returnval
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
721 elif(httplibuse
=="pycurl2"):
722 returnval
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
723 elif(httplibuse
=="pycurl3"):
724 returnval
= download_from_url_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
725 elif(httplibuse
=="ftp"):
726 returnval
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
727 elif(httplibuse
=="sftp"):
728 returnval
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
729 elif(httplibuse
=="pysftp"):
730 returnval
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
735 def download_from_url_from_list(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1, timeout
=10):
736 if(isinstance(httpurl
, list)):
738 elif(isinstance(httpurl
, tuple)):
740 elif(isinstance(httpurl
, dict)):
741 httpurl
= httpurl
.values();
744 listsize
= len(httpurl
);
747 while(listcount
<listsize
):
748 ouputval
= download_from_url(httpurl
[listcount
], httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, buffersize
, sleep
, timeout
);
749 returnval
.append(ouputval
);
753 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
754 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
756 sleep
= geturls_download_sleep
;
759 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
760 httplibuse
= "urllib";
761 if(httplibuse
=="httplib1"):
762 httplibuse
= "httplib";
763 if(not haverequests
and httplibuse
=="requests"):
764 httplibuse
= "urllib";
765 if(not haveaiohttp
and httplibuse
=="aiohttp"):
766 httplibuse
= "urllib";
767 if(not havehttpx
and httplibuse
=="httpx"):
768 httplibuse
= "urllib";
769 if(not havehttpx
and httplibuse
=="httpx2"):
770 httplibuse
= "urllib";
771 if(not havehttpcore
and httplibuse
=="httpcore"):
772 httplibuse
= "urllib";
773 if(not havehttpcore
and httplibuse
=="httpcore2"):
774 httplibuse
= "urllib";
775 if(not havemechanize
and httplibuse
=="mechanize"):
776 httplibuse
= "urllib";
777 if(not havepycurl
and httplibuse
=="pycurl"):
778 httplibuse
= "urllib";
779 if(not havepycurl
and httplibuse
=="pycurl2"):
780 httplibuse
= "urllib";
781 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
782 httplibuse
= "pycurl";
783 if(not havepycurl
and httplibuse
=="pycurl3"):
784 httplibuse
= "urllib";
785 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
786 httplibuse
= "pycurl2";
787 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
788 httplibuse
= "pycurl";
789 if(not havehttplib2
and httplibuse
=="httplib2"):
790 httplibuse
= "httplib";
791 if(not haveparamiko
and httplibuse
=="sftp"):
793 if(not haveparamiko
and httplibuse
=="pysftp"):
795 if(httplibuse
=="urllib" or httplibuse
=="request"):
796 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
797 elif(httplibuse
=="request"):
798 returnval
= download_from_url_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
799 elif(httplibuse
=="request3"):
800 returnval
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
801 elif(httplibuse
=="httplib"):
802 returnval
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
803 elif(httplibuse
=="httplib2"):
804 returnval
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
805 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
806 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
807 elif(httplibuse
=="requests"):
808 returnval
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
809 elif(httplibuse
=="aiohttp"):
810 returnval
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
811 elif(httplibuse
=="httpx"):
812 returnval
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
813 elif(httplibuse
=="httpx2"):
814 returnval
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
815 elif(httplibuse
=="httpcore"):
816 returnval
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
817 elif(httplibuse
=="httpcore2"):
818 returnval
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
819 elif(httplibuse
=="mechanize"):
820 returnval
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
821 elif(httplibuse
=="pycurl"):
822 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
823 elif(httplibuse
=="pycurl2"):
824 returnval
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
825 elif(httplibuse
=="pycurl3"):
826 returnval
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
827 elif(httplibuse
=="ftp"):
828 returnval
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
829 elif(httplibuse
=="sftp"):
830 returnval
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
831 elif(httplibuse
=="pysftp"):
832 returnval
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
837 def download_from_url_file_with_list(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
838 if(isinstance(httpurl
, list)):
840 elif(isinstance(httpurl
, tuple)):
842 elif(isinstance(httpurl
, dict)):
843 httpurl
= httpurl
.values();
846 listsize
= len(httpurl
);
849 while(listcount
<listsize
):
850 ouputval
= download_from_url_file(httpurl
[listcount
], httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, ranges
, buffersize
, sleep
, timeout
);
851 returnval
.append(ouputval
);
855 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
856 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
858 sleep
= geturls_download_sleep
;
861 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
862 httplibuse
= "urllib";
863 if(httplibuse
=="httplib1"):
864 httplibuse
= "httplib";
865 if(not haverequests
and httplibuse
=="requests"):
866 httplibuse
= "urllib";
867 if(not haveaiohttp
and httplibuse
=="aiohttp"):
868 httplibuse
= "urllib";
869 if(not havehttpx
and httplibuse
=="httpx"):
870 httplibuse
= "urllib";
871 if(not havehttpx
and httplibuse
=="httpx2"):
872 httplibuse
= "urllib";
873 if(not havehttpcore
and httplibuse
=="httpcore"):
874 httplibuse
= "urllib";
875 if(not havehttpcore
and httplibuse
=="httpcore2"):
876 httplibuse
= "urllib";
877 if(not havemechanize
and httplibuse
=="mechanize"):
878 httplibuse
= "urllib";
879 if(not havepycurl
and httplibuse
=="pycurl"):
880 httplibuse
= "urllib";
881 if(not havepycurl
and httplibuse
=="pycurl2"):
882 httplibuse
= "urllib";
883 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
884 httplibuse
= "pycurl";
885 if(not havepycurl
and httplibuse
=="pycurl3"):
886 httplibuse
= "urllib";
887 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
888 httplibuse
= "pycurl2";
889 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
890 httplibuse
= "pycurl";
891 if(not havehttplib2
and httplibuse
=="httplib2"):
892 httplibuse
= "httplib";
893 if(not haveparamiko
and httplibuse
=="sftp"):
895 if(not havepysftp
and httplibuse
=="pysftp"):
897 if(httplibuse
=="urllib" or httplibuse
=="request"):
898 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
899 elif(httplibuse
=="request"):
900 returnval
= download_from_url_to_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
901 elif(httplibuse
=="request3"):
902 returnval
= download_from_url_to_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
903 elif(httplibuse
=="httplib"):
904 returnval
= download_from_url_to_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
905 elif(httplibuse
=="httplib2"):
906 returnval
= download_from_url_to_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
907 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
908 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
909 elif(httplibuse
=="requests"):
910 returnval
= download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
911 elif(httplibuse
=="aiohttp"):
912 returnval
= download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
913 elif(httplibuse
=="httpx"):
914 returnval
= download_from_url_to_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
915 elif(httplibuse
=="httpx2"):
916 returnval
= download_from_url_to_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
917 elif(httplibuse
=="httpcore"):
918 returnval
= download_from_url_to_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
919 elif(httplibuse
=="httpcore2"):
920 returnval
= download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
921 elif(httplibuse
=="mechanize"):
922 returnval
= download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
923 elif(httplibuse
=="pycurl"):
924 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
925 elif(httplibuse
=="pycurl2"):
926 returnval
= download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
927 elif(httplibuse
=="pycurl3"):
928 returnval
= download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
929 elif(httplibuse
=="ftp"):
930 returnval
= download_from_url_to_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
931 elif(httplibuse
=="sftp"):
932 returnval
= download_from_url_to_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
933 elif(httplibuse
=="pysftp"):
934 returnval
= download_from_url_to_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
939 def download_from_url_to_file_with_list(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
940 if(isinstance(httpurl
, list)):
942 elif(isinstance(httpurl
, tuple)):
944 elif(isinstance(httpurl
, dict)):
945 httpurl
= httpurl
.values();
948 listsize
= len(httpurl
);
951 while(listcount
<listsize
):
952 ouputval
= download_from_url_to_file(httpurl
[listcount
], httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
953 returnval
.append(ouputval
);
957 def download_from_url_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
958 global geturls_download_sleep
, havezstd
, havebrotli
;
960 sleep
= geturls_download_sleep
;
963 urlparts
= urlparse
.urlparse(httpurl
);
964 if(isinstance(httpheaders
, list)):
965 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
966 httpheaders
= fix_header_names(httpheaders
);
967 if(httpuseragent
is not None):
968 if('User-Agent' in httpheaders
):
969 httpheaders
['User-Agent'] = httpuseragent
;
971 httpuseragent
.update({'User-Agent': httpuseragent
});
972 if(httpreferer
is not None):
973 if('Referer' in httpheaders
):
974 httpheaders
['Referer'] = httpreferer
;
976 httpuseragent
.update({'Referer': httpreferer
});
977 if(urlparts
.username
is not None or urlparts
.password
is not None):
978 if(sys
.version
[0]=="2"):
979 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
980 if(sys
.version
[0]>="3"):
981 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
982 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
983 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
984 if(isinstance(httpheaders
, dict)):
985 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
986 geturls_opener
.addheaders
= httpheaders
;
988 if(postdata
is not None and not isinstance(postdata
, dict)):
989 postdata
= urlencode(postdata
);
991 geturls_request
= Request(httpurl
);
992 if(httpmethod
=="GET"):
993 geturls_text
= geturls_opener
.open(geturls_request
);
994 elif(httpmethod
=="POST"):
995 geturls_text
= geturls_opener
.open(geturls_request
, data
=postdata
);
997 geturls_text
= geturls_opener
.open(geturls_request
);
998 except HTTPError
as geturls_text_error
:
999 geturls_text
= geturls_text_error
;
1000 log
.info("Error With URL "+httpurl
);
1002 log
.info("Error With URL "+httpurl
);
1004 except socket
.timeout
:
1005 log
.info("Error With URL "+httpurl
);
1007 httpcodeout
= geturls_text
.getcode();
1009 httpcodereason
= geturls_text
.reason
;
1010 except AttributeError:
1011 httpcodereason
= http_status_to_reason(geturls_text
.getcode());
1013 httpversionout
= geturls_text
.version
;
1014 except AttributeError:
1015 httpversionout
= "1.1";
1016 httpmethodout
= geturls_request
.get_method();
1017 httpurlout
= geturls_text
.geturl();
1018 httpheaderout
= geturls_text
.info();
1019 httpheadersentout
= httpheaders
;
1020 if(isinstance(httpheaderout
, list)):
1021 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1022 httpheaderout
= fix_header_names(httpheaderout
);
1023 if(sys
.version
[0]=="2"):
1025 prehttpheaderout
= httpheaderout
;
1026 httpheaderkeys
= httpheaderout
.keys();
1027 imax
= len(httpheaderkeys
);
1031 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1033 except AttributeError:
1035 if(isinstance(httpheadersentout
, list)):
1036 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1037 httpheadersentout
= fix_header_names(httpheadersentout
);
1038 downloadsize
= httpheaderout
.get('Content-Length');
1039 if(downloadsize
is not None):
1040 downloadsize
= int(downloadsize
);
1041 if downloadsize
is None: downloadsize
= 0;
1044 log
.info("Downloading URL "+httpurl
);
1045 with
BytesIO() as strbuf
:
1047 databytes
= geturls_text
.read(buffersize
);
1048 if not databytes
: break;
1049 datasize
= len(databytes
);
1050 fulldatasize
= datasize
+ fulldatasize
;
1053 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1054 downloaddiff
= fulldatasize
- prevdownsize
;
1055 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1056 prevdownsize
= fulldatasize
;
1057 strbuf
.write(databytes
);
1059 returnval_content
= strbuf
.read();
1060 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1062 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1065 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1067 returnval_content
= zlib
.decompress(returnval_content
);
1070 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1072 returnval_content
= brotli
.decompress(returnval_content
);
1073 except brotli
.error
:
1075 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1077 returnval_content
= zstandard
.decompress(returnval_content
);
1078 except zstandard
.error
:
1080 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1082 returnval_content
= lzma
.decompress(returnval_content
);
1083 except zstandard
.error
:
1085 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1087 returnval_content
= bz2
.decompress(returnval_content
);
1088 except zstandard
.error
:
1090 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "urllib"};
1091 geturls_text
.close();
1094 def download_from_url_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1095 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1096 exec_time_start
= time
.time();
1097 myhash
= hashlib
.new("sha1");
1098 if(sys
.version
[0]=="2"):
1099 myhash
.update(httpurl
);
1100 myhash
.update(str(buffersize
));
1101 myhash
.update(str(exec_time_start
));
1102 if(sys
.version
[0]>="3"):
1103 myhash
.update(httpurl
.encode('utf-8'));
1104 myhash
.update(str(buffersize
).encode('utf-8'));
1105 myhash
.update(str(exec_time_start
).encode('utf-8'));
1106 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1108 sleep
= geturls_download_sleep
;
1111 pretmpfilename
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1112 if(not pretmpfilename
):
1114 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1115 tmpfilename
= f
.name
;
1117 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1118 except AttributeError:
1120 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1125 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1126 f
.write(pretmpfilename
.get('Content'));
1128 exec_time_end
= time
.time();
1129 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1130 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1133 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1134 global geturls_download_sleep
, havezstd
, havebrotli
;
1136 sleep
= geturls_download_sleep
;
1139 if(not outfile
=="-"):
1140 outpath
= outpath
.rstrip(os
.path
.sep
);
1141 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1142 if(not os
.path
.exists(outpath
)):
1143 os
.makedirs(outpath
);
1144 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1146 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1148 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1149 if(not pretmpfilename
):
1151 tmpfilename
= pretmpfilename
.get('Filename');
1152 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1154 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1155 exec_time_start
= time
.time();
1156 shutil
.move(tmpfilename
, filepath
);
1158 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1159 except AttributeError:
1161 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1166 exec_time_end
= time
.time();
1167 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1168 if(os
.path
.exists(tmpfilename
)):
1169 os
.remove(tmpfilename
);
1170 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1172 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1173 tmpfilename
= pretmpfilename
.get('Filename');
1174 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1177 exec_time_start
= time
.time();
1178 with
open(tmpfilename
, 'rb') as ft
:
1181 databytes
= ft
.read(buffersize
[1]);
1182 if not databytes
: break;
1183 datasize
= len(databytes
);
1184 fulldatasize
= datasize
+ fulldatasize
;
1187 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1188 downloaddiff
= fulldatasize
- prevdownsize
;
1189 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1190 prevdownsize
= fulldatasize
;
1193 fdata
= f
.getvalue();
1196 os
.remove(tmpfilename
);
1197 exec_time_end
= time
.time();
1198 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1199 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1202 def download_from_url_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1203 global geturls_download_sleep
, havezstd
, havebrotli
, havezstd
, havebrotli
;
1205 sleep
= geturls_download_sleep
;
1208 urlparts
= urlparse
.urlparse(httpurl
);
1209 if(isinstance(httpheaders
, list)):
1210 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1211 httpheaders
= fix_header_names(httpheaders
);
1212 if(httpuseragent
is not None):
1213 if('User-Agent' in httpheaders
):
1214 httpheaders
['User-Agent'] = httpuseragent
;
1216 httpuseragent
.update({'User-Agent': httpuseragent
});
1217 if(httpreferer
is not None):
1218 if('Referer' in httpheaders
):
1219 httpheaders
['Referer'] = httpreferer
;
1221 httpuseragent
.update({'Referer': httpreferer
});
1222 if(urlparts
.username
is not None or urlparts
.password
is not None):
1223 if(sys
.version
[0]=="2"):
1224 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1225 if(sys
.version
[0]>="3"):
1226 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1227 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1228 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1229 geturls_opener
.addheaders
= httpheaders
;
1231 if(urlparts
[0]=="http"):
1232 httpconn
= HTTPConnection(urlparts
[1], timeout
=timeout
);
1233 elif(urlparts
[0]=="https"):
1234 httpconn
= HTTPSConnection(urlparts
[1], timeout
=timeout
);
1237 if(postdata
is not None and not isinstance(postdata
, dict)):
1238 postdata
= urlencode(postdata
);
1240 if(httpmethod
=="GET"):
1241 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1242 elif(httpmethod
=="POST"):
1243 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1245 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1246 except socket
.timeout
:
1247 log
.info("Error With URL "+httpurl
);
1249 except socket
.gaierror
:
1250 log
.info("Error With URL "+httpurl
);
1252 except BlockingIOError
:
1253 log
.info("Error With URL "+httpurl
);
1255 geturls_text
= httpconn
.getresponse();
1256 httpcodeout
= geturls_text
.status
;
1257 httpcodereason
= geturls_text
.reason
;
1258 if(geturls_text
.version
=="10"):
1259 httpversionout
= "1.0";
1261 httpversionout
= "1.1";
1262 httpmethodout
= geturls_text
._method
;
1263 httpurlout
= httpurl
;
1264 httpheaderout
= geturls_text
.getheaders();
1265 httpheadersentout
= httpheaders
;
1266 if(isinstance(httpheaderout
, list)):
1267 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1268 if(sys
.version
[0]=="2"):
1270 prehttpheaderout
= httpheaderout
;
1271 httpheaderkeys
= httpheaderout
.keys();
1272 imax
= len(httpheaderkeys
);
1276 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1278 except AttributeError:
1280 httpheaderout
= fix_header_names(httpheaderout
);
1281 if(isinstance(httpheadersentout
, list)):
1282 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1283 httpheadersentout
= fix_header_names(httpheadersentout
);
1284 downloadsize
= httpheaderout
.get('Content-Length');
1285 if(downloadsize
is not None):
1286 downloadsize
= int(downloadsize
);
1287 if downloadsize
is None: downloadsize
= 0;
1290 log
.info("Downloading URL "+httpurl
);
1291 with
BytesIO() as strbuf
:
1293 databytes
= geturls_text
.read(buffersize
);
1294 if not databytes
: break;
1295 datasize
= len(databytes
);
1296 fulldatasize
= datasize
+ fulldatasize
;
1299 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1300 downloaddiff
= fulldatasize
- prevdownsize
;
1301 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1302 prevdownsize
= fulldatasize
;
1303 strbuf
.write(databytes
);
1305 returnval_content
= strbuf
.read();
1306 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1308 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1311 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1313 returnval_content
= zlib
.decompress(returnval_content
);
1316 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1318 returnval_content
= brotli
.decompress(returnval_content
);
1319 except brotli
.error
:
1321 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1323 returnval_content
= zstandard
.decompress(returnval_content
);
1324 except zstandard
.error
:
1326 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1328 returnval_content
= lzma
.decompress(returnval_content
);
1329 except zstandard
.error
:
1331 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1333 returnval_content
= bz2
.decompress(returnval_content
);
1334 except zstandard
.error
:
1336 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httplib"};
1337 geturls_text
.close();
1340 def download_from_url_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1341 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1342 exec_time_start
= time
.time();
1343 myhash
= hashlib
.new("sha1");
1344 if(sys
.version
[0]=="2"):
1345 myhash
.update(httpurl
);
1346 myhash
.update(str(buffersize
));
1347 myhash
.update(str(exec_time_start
));
1348 if(sys
.version
[0]>="3"):
1349 myhash
.update(httpurl
.encode('utf-8'));
1350 myhash
.update(str(buffersize
).encode('utf-8'));
1351 myhash
.update(str(exec_time_start
).encode('utf-8'));
1352 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1354 sleep
= geturls_download_sleep
;
1357 pretmpfilename
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1358 if(not pretmpfilename
):
1360 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1361 tmpfilename
= f
.name
;
1363 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1364 except AttributeError:
1366 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1371 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1372 f
.write(pretmpfilename
.get('Content'));
1374 exec_time_end
= time
.time();
1375 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1376 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1379 def download_from_url_to_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1380 global geturls_download_sleep
, havezstd
, havebrotli
;
1382 sleep
= geturls_download_sleep
;
1385 if(not outfile
=="-"):
1386 outpath
= outpath
.rstrip(os
.path
.sep
);
1387 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1388 if(not os
.path
.exists(outpath
)):
1389 os
.makedirs(outpath
);
1390 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1392 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1394 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1395 if(not pretmpfilename
):
1397 tmpfilename
= pretmpfilename
.get('Filename');
1398 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1400 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1401 exec_time_start
= time
.time();
1402 shutil
.move(tmpfilename
, filepath
);
1404 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1405 except AttributeError:
1407 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1412 exec_time_end
= time
.time();
1413 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1414 if(os
.path
.exists(tmpfilename
)):
1415 os
.remove(tmpfilename
);
1416 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1418 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1419 tmpfilename
= pretmpfilename
.get('Filename');
1420 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1423 exec_time_start
= time
.time();
1424 with
open(tmpfilename
, 'rb') as ft
:
1427 databytes
= ft
.read(buffersize
[1]);
1428 if not databytes
: break;
1429 datasize
= len(databytes
);
1430 fulldatasize
= datasize
+ fulldatasize
;
1433 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1434 downloaddiff
= fulldatasize
- prevdownsize
;
1435 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1436 prevdownsize
= fulldatasize
;
1439 fdata
= f
.getvalue();
1442 os
.remove(tmpfilename
);
1443 exec_time_end
= time
.time();
1444 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1445 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1449 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1450 global geturls_download_sleep
, havezstd
, havebrotli
;
1452 sleep
= geturls_download_sleep
;
1455 urlparts
= urlparse
.urlparse(httpurl
);
1456 if(isinstance(httpheaders
, list)):
1457 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1458 httpheaders
= fix_header_names(httpheaders
);
1459 if(httpuseragent
is not None):
1460 if('User-Agent' in httpheaders
):
1461 httpheaders
['User-Agent'] = httpuseragent
;
1463 httpuseragent
.update({'User-Agent': httpuseragent
});
1464 if(httpreferer
is not None):
1465 if('Referer' in httpheaders
):
1466 httpheaders
['Referer'] = httpreferer
;
1468 httpuseragent
.update({'Referer': httpreferer
});
1469 if(urlparts
.username
is not None or urlparts
.password
is not None):
1470 if(sys
.version
[0]=="2"):
1471 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1472 if(sys
.version
[0]>="3"):
1473 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1474 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1475 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1476 geturls_opener
.addheaders
= httpheaders
;
1478 if(urlparts
[0]=="http"):
1479 httpconn
= HTTPConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
1480 elif(urlparts
[0]=="https"):
1481 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
1484 if(postdata
is not None and not isinstance(postdata
, dict)):
1485 postdata
= urlencode(postdata
);
1487 if(httpmethod
=="GET"):
1488 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1489 elif(httpmethod
=="POST"):
1490 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1492 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1493 except socket
.timeout
:
1494 log
.info("Error With URL "+httpurl
);
1496 except socket
.gaierror
:
1497 log
.info("Error With URL "+httpurl
);
1499 except BlockingIOError
:
1500 log
.info("Error With URL "+httpurl
);
1502 geturls_text
= httpconn
.getresponse();
1503 httpcodeout
= geturls_text
.status
;
1504 httpcodereason
= geturls_text
.reason
;
1505 if(geturls_text
.version
=="10"):
1506 httpversionout
= "1.0";
1508 httpversionout
= "1.1";
1509 httpmethodout
= httpmethod
;
1510 httpurlout
= httpurl
;
1511 httpheaderout
= geturls_text
.getheaders();
1512 httpheadersentout
= httpheaders
;
1513 if(isinstance(httpheaderout
, list)):
1514 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1515 if(sys
.version
[0]=="2"):
1517 prehttpheaderout
= httpheaderout
;
1518 httpheaderkeys
= httpheaderout
.keys();
1519 imax
= len(httpheaderkeys
);
1523 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1525 except AttributeError:
1527 httpheaderout
= fix_header_names(httpheaderout
);
1528 if(isinstance(httpheadersentout
, list)):
1529 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1530 httpheadersentout
= fix_header_names(httpheadersentout
);
1531 downloadsize
= httpheaderout
.get('Content-Length');
1532 if(downloadsize
is not None):
1533 downloadsize
= int(downloadsize
);
1534 if downloadsize
is None: downloadsize
= 0;
1537 log
.info("Downloading URL "+httpurl
);
1538 with
BytesIO() as strbuf
:
1540 databytes
= geturls_text
.read(buffersize
);
1541 if not databytes
: break;
1542 datasize
= len(databytes
);
1543 fulldatasize
= datasize
+ fulldatasize
;
1546 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1547 downloaddiff
= fulldatasize
- prevdownsize
;
1548 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1549 prevdownsize
= fulldatasize
;
1550 strbuf
.write(databytes
);
1552 returnval_content
= strbuf
.read();
1553 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1555 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1558 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1560 returnval_content
= zlib
.decompress(returnval_content
);
1563 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1565 returnval_content
= brotli
.decompress(returnval_content
);
1566 except brotli
.error
:
1568 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1570 returnval_content
= zstandard
.decompress(returnval_content
);
1571 except zstandard
.error
:
1573 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1575 returnval_content
= lzma
.decompress(returnval_content
);
1576 except zstandard
.error
:
1578 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1580 returnval_content
= bz2
.decompress(returnval_content
);
1581 except zstandard
.error
:
1583 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httplib2"};
1584 geturls_text
.close();
1587 if(not havehttplib2
):
1588 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1589 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1593 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1594 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1595 exec_time_start
= time
.time();
1596 myhash
= hashlib
.new("sha1");
1597 if(sys
.version
[0]=="2"):
1598 myhash
.update(httpurl
);
1599 myhash
.update(str(buffersize
));
1600 myhash
.update(str(exec_time_start
));
1601 if(sys
.version
[0]>="3"):
1602 myhash
.update(httpurl
.encode('utf-8'));
1603 myhash
.update(str(buffersize
).encode('utf-8'));
1604 myhash
.update(str(exec_time_start
).encode('utf-8'));
1605 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1607 sleep
= geturls_download_sleep
;
1610 pretmpfilename
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1611 if(not pretmpfilename
):
1613 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1614 tmpfilename
= f
.name
;
1616 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1617 except AttributeError:
1619 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1624 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1625 f
.write(pretmpfilename
.get('Content'));
1627 exec_time_end
= time
.time();
1628 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1629 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1632 if(not havehttplib2
):
1633 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1634 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1638 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1639 global geturls_download_sleep
, havezstd
, havebrotli
;
1641 sleep
= geturls_download_sleep
;
1644 if(not outfile
=="-"):
1645 outpath
= outpath
.rstrip(os
.path
.sep
);
1646 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1647 if(not os
.path
.exists(outpath
)):
1648 os
.makedirs(outpath
);
1649 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1651 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1653 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1654 if(not pretmpfilename
):
1656 tmpfilename
= pretmpfilename
.get('Filename');
1657 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1659 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1660 exec_time_start
= time
.time();
1661 shutil
.move(tmpfilename
, filepath
);
1663 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1664 except AttributeError:
1666 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1671 exec_time_end
= time
.time();
1672 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1673 if(os
.path
.exists(tmpfilename
)):
1674 os
.remove(tmpfilename
);
1675 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1677 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1678 tmpfilename
= pretmpfilename
.get('Filename');
1679 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1682 exec_time_start
= time
.time();
1683 with
open(tmpfilename
, 'rb') as ft
:
1686 databytes
= ft
.read(buffersize
[1]);
1687 if not databytes
: break;
1688 datasize
= len(databytes
);
1689 fulldatasize
= datasize
+ fulldatasize
;
1692 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1693 downloaddiff
= fulldatasize
- prevdownsize
;
1694 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1695 prevdownsize
= fulldatasize
;
1698 fdata
= f
.getvalue();
1701 os
.remove(tmpfilename
);
1702 exec_time_end
= time
.time();
1703 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1704 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1707 if(not havehttplib2
):
1708 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1709 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1712 def download_from_url_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1713 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1716 def download_from_url_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1717 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1720 def download_from_url_to_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1721 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1725 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1726 global geturls_download_sleep
, havezstd
, havebrotli
;
1728 sleep
= geturls_download_sleep
;
1731 urlparts
= urlparse
.urlparse(httpurl
);
1732 if(isinstance(httpheaders
, list)):
1733 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1734 httpheaders
= fix_header_names(httpheaders
);
1735 if(httpuseragent
is not None):
1736 if('User-Agent' in httpheaders
):
1737 httpheaders
['User-Agent'] = httpuseragent
;
1739 httpuseragent
.update({'User-Agent': httpuseragent
});
1740 if(httpreferer
is not None):
1741 if('Referer' in httpheaders
):
1742 httpheaders
['Referer'] = httpreferer
;
1744 httpuseragent
.update({'Referer': httpreferer
});
1745 if(urlparts
.username
is not None or urlparts
.password
is not None):
1746 if(sys
.version
[0]=="2"):
1747 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1748 if(sys
.version
[0]>="3"):
1749 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1750 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1752 if(postdata
is not None and not isinstance(postdata
, dict)):
1753 postdata
= urlencode(postdata
);
1755 reqsession
= requests
.Session();
1756 if(httpmethod
=="GET"):
1757 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1758 elif(httpmethod
=="POST"):
1759 geturls_text
= reqsession
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1761 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1762 except requests
.exceptions
.ConnectTimeout
:
1763 log
.info("Error With URL "+httpurl
);
1765 except requests
.exceptions
.ConnectError
:
1766 log
.info("Error With URL "+httpurl
);
1768 except socket
.timeout
:
1769 log
.info("Error With URL "+httpurl
);
1771 httpcodeout
= geturls_text
.status_code
;
1772 httpcodereason
= geturls_text
.reason
;
1773 if(geturls_text
.raw
.version
=="10"):
1774 httpversionout
= "1.0";
1776 httpversionout
= "1.1";
1777 httpmethodout
= httpmethod
;
1778 httpurlout
= geturls_text
.url
;
1779 httpheaderout
= geturls_text
.headers
;
1780 httpheadersentout
= geturls_text
.request
.headers
;
1781 if(isinstance(httpheaderout
, list)):
1782 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1783 if(sys
.version
[0]=="2"):
1785 prehttpheaderout
= httpheaderout
;
1786 httpheaderkeys
= httpheaderout
.keys();
1787 imax
= len(httpheaderkeys
);
1791 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1793 except AttributeError:
1795 httpheaderout
= fix_header_names(httpheaderout
);
1796 if(isinstance(httpheadersentout
, list)):
1797 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1798 httpheadersentout
= fix_header_names(httpheadersentout
);
1799 downloadsize
= httpheaderout
.get('Content-Length');
1800 if(downloadsize
is not None):
1801 downloadsize
= int(downloadsize
);
1802 if downloadsize
is None: downloadsize
= 0;
1805 log
.info("Downloading URL "+httpurl
);
1806 with
BytesIO() as strbuf
:
1808 databytes
= geturls_text
.raw
.read(buffersize
);
1809 if not databytes
: break;
1810 datasize
= len(databytes
);
1811 fulldatasize
= datasize
+ fulldatasize
;
1814 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1815 downloaddiff
= fulldatasize
- prevdownsize
;
1816 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1817 prevdownsize
= fulldatasize
;
1818 strbuf
.write(databytes
);
1820 returnval_content
= strbuf
.read();
1821 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1823 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1826 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1828 returnval_content
= zlib
.decompress(returnval_content
);
1831 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1833 returnval_content
= brotli
.decompress(returnval_content
);
1834 except brotli
.error
:
1836 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1838 returnval_content
= zstandard
.decompress(returnval_content
);
1839 except zstandard
.error
:
1841 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
1843 returnval_content
= lzma
.decompress(returnval_content
);
1844 except zstandard
.error
:
1846 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1848 returnval_content
= bz2
.decompress(returnval_content
);
1849 except zstandard
.error
:
1851 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "requests"};
1852 geturls_text
.close();
1855 if(not haverequests
):
1856 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1857 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1861 def download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1862 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1863 exec_time_start
= time
.time();
1864 myhash
= hashlib
.new("sha1");
1865 if(sys
.version
[0]=="2"):
1866 myhash
.update(httpurl
);
1867 myhash
.update(str(buffersize
));
1868 myhash
.update(str(exec_time_start
));
1869 if(sys
.version
[0]>="3"):
1870 myhash
.update(httpurl
.encode('utf-8'));
1871 myhash
.update(str(buffersize
).encode('utf-8'));
1872 myhash
.update(str(exec_time_start
).encode('utf-8'));
1873 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1875 sleep
= geturls_download_sleep
;
1878 pretmpfilename
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1879 if(not pretmpfilename
):
1881 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1882 tmpfilename
= f
.name
;
1884 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1885 except AttributeError:
1887 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1892 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1893 f
.write(pretmpfilename
.get('Content'));
1895 exec_time_end
= time
.time();
1896 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1897 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1900 if(not haverequests
):
1901 def download_from_url_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1902 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1906 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1907 global geturls_download_sleep
, havezstd
, havebrotli
;
1909 sleep
= geturls_download_sleep
;
1912 if(not outfile
=="-"):
1913 outpath
= outpath
.rstrip(os
.path
.sep
);
1914 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1915 if(not os
.path
.exists(outpath
)):
1916 os
.makedirs(outpath
);
1917 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1919 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1921 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1922 if(not pretmpfilename
):
1924 tmpfilename
= pretmpfilename
.get('Filename');
1925 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1927 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1928 exec_time_start
= time
.time();
1929 shutil
.move(tmpfilename
, filepath
);
1931 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1932 except AttributeError:
1934 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1939 exec_time_end
= time
.time();
1940 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1941 if(os
.path
.exists(tmpfilename
)):
1942 os
.remove(tmpfilename
);
1943 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1945 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1946 tmpfilename
= pretmpfilename
.get('Filename');
1947 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1950 exec_time_start
= time
.time();
1951 with
open(tmpfilename
, 'rb') as ft
:
1954 databytes
= ft
.read(buffersize
[1]);
1955 if not databytes
: break;
1956 datasize
= len(databytes
);
1957 fulldatasize
= datasize
+ fulldatasize
;
1960 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1961 downloaddiff
= fulldatasize
- prevdownsize
;
1962 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1963 prevdownsize
= fulldatasize
;
1966 fdata
= f
.getvalue();
1969 os
.remove(tmpfilename
);
1970 exec_time_end
= time
.time();
1971 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1972 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1975 if(not haverequests
):
1976 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1977 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1981 def download_from_url_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1982 global geturls_download_sleep
, havezstd
, havebrotli
;
1984 sleep
= geturls_download_sleep
;
1987 urlparts
= urlparse
.urlparse(httpurl
);
1988 if(isinstance(httpheaders
, list)):
1989 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1990 httpheaders
= fix_header_names(httpheaders
);
1991 if(httpuseragent
is not None):
1992 if('User-Agent' in httpheaders
):
1993 httpheaders
['User-Agent'] = httpuseragent
;
1995 httpuseragent
.update({'User-Agent': httpuseragent
});
1996 if(httpreferer
is not None):
1997 if('Referer' in httpheaders
):
1998 httpheaders
['Referer'] = httpreferer
;
2000 httpuseragent
.update({'Referer': httpreferer
});
2001 if(urlparts
.username
is not None or urlparts
.password
is not None):
2002 if(sys
.version
[0]=="2"):
2003 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2004 if(sys
.version
[0]>="3"):
2005 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2006 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2008 if(postdata
is not None and not isinstance(postdata
, dict)):
2009 postdata
= urlencode(postdata
);
2011 reqsession
= aiohttp
.ClientSession(cookie_jar
=httpcookie
, headers
=httpheaders
, timeout
=timeout
, read_timeout
=timeout
, conn_timeout
=timeout
, read_bufsize
=buffersize
);
2012 if(httpmethod
=="GET"):
2013 geturls_text
= reqsession
.get(httpurl
);
2014 elif(httpmethod
=="POST"):
2015 geturls_text
= reqsession
.post(httpurl
, data
=postdata
);
2017 geturls_text
= reqsession
.get(httpurl
);
2018 except aiohttp
.exceptions
.ConnectTimeout
:
2019 log
.info("Error With URL "+httpurl
);
2021 except aiohttp
.exceptions
.ConnectError
:
2022 log
.info("Error With URL "+httpurl
);
2024 except socket
.timeout
:
2025 log
.info("Error With URL "+httpurl
);
2027 httpcodeout
= geturls_text
.status
;
2028 httpcodereason
= geturls_text
.reason
;
2029 httpversionout
= geturls_text
.version
;
2030 httpmethodout
= geturls_text
.method
;
2031 httpurlout
= geturls_text
.url
;
2032 httpheaderout
= geturls_text
.headers
;
2033 httpheadersentout
= geturls_text
.request_info
.headers
;
2034 if(isinstance(httpheaderout
, list)):
2035 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2036 if(sys
.version
[0]=="2"):
2038 prehttpheaderout
= httpheaderout
;
2039 httpheaderkeys
= httpheaderout
.keys();
2040 imax
= len(httpheaderkeys
);
2044 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2046 except AttributeError:
2048 httpheaderout
= fix_header_names(httpheaderout
);
2049 if(isinstance(httpheadersentout
, list)):
2050 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2051 httpheadersentout
= fix_header_names(httpheadersentout
);
2052 downloadsize
= httpheaderout
.get('Content-Length');
2053 if(downloadsize
is not None):
2054 downloadsize
= int(downloadsize
);
2055 if downloadsize
is None: downloadsize
= 0;
2058 log
.info("Downloading URL "+httpurl
);
2059 with
BytesIO() as strbuf
:
2061 databytes
= geturls_text
.read(buffersize
);
2062 if not databytes
: break;
2063 datasize
= len(databytes
);
2064 fulldatasize
= datasize
+ fulldatasize
;
2067 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2068 downloaddiff
= fulldatasize
- prevdownsize
;
2069 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2070 prevdownsize
= fulldatasize
;
2071 strbuf
.write(databytes
);
2073 returnval_content
= strbuf
.read();
2074 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2076 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2079 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2081 returnval_content
= zlib
.decompress(returnval_content
);
2084 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2086 returnval_content
= brotli
.decompress(returnval_content
);
2087 except brotli
.error
:
2089 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2091 returnval_content
= zstandard
.decompress(returnval_content
);
2092 except zstandard
.error
:
2094 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2096 returnval_content
= lzma
.decompress(returnval_content
);
2097 except zstandard
.error
:
2099 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2101 returnval_content
= bz2
.decompress(returnval_content
);
2102 except zstandard
.error
:
2104 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "aiohttp"};
2105 geturls_text
.close();
2108 if(not haveaiohttp
):
2109 def download_from_url_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2110 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2114 def download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2115 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2116 exec_time_start
= time
.time();
2117 myhash
= hashlib
.new("sha1");
2118 if(sys
.version
[0]=="2"):
2119 myhash
.update(httpurl
);
2120 myhash
.update(str(buffersize
));
2121 myhash
.update(str(exec_time_start
));
2122 if(sys
.version
[0]>="3"):
2123 myhash
.update(httpurl
.encode('utf-8'));
2124 myhash
.update(str(buffersize
).encode('utf-8'));
2125 myhash
.update(str(exec_time_start
).encode('utf-8'));
2126 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2128 sleep
= geturls_download_sleep
;
2131 pretmpfilename
= download_from_url_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2132 if(not pretmpfilename
):
2134 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2135 tmpfilename
= f
.name
;
2137 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2138 except AttributeError:
2140 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2145 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2146 f
.write(pretmpfilename
.get('Content'));
2148 exec_time_end
= time
.time();
2149 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2150 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2153 if(not haveaiohttp
):
2154 def download_from_url_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2155 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2159 def download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2160 global geturls_download_sleep
, havezstd
, havebrotli
;
2162 sleep
= geturls_download_sleep
;
2165 if(not outfile
=="-"):
2166 outpath
= outpath
.rstrip(os
.path
.sep
);
2167 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2168 if(not os
.path
.exists(outpath
)):
2169 os
.makedirs(outpath
);
2170 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2172 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2174 pretmpfilename
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2175 if(not pretmpfilename
):
2177 tmpfilename
= pretmpfilename
.get('Filename');
2178 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2180 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2181 exec_time_start
= time
.time();
2182 shutil
.move(tmpfilename
, filepath
);
2184 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2185 except AttributeError:
2187 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2192 exec_time_end
= time
.time();
2193 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2194 if(os
.path
.exists(tmpfilename
)):
2195 os
.remove(tmpfilename
);
2196 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2198 pretmpfilename
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2199 tmpfilename
= pretmpfilename
.get('Filename');
2200 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2203 exec_time_start
= time
.time();
2204 with
open(tmpfilename
, 'rb') as ft
:
2207 databytes
= ft
.read(buffersize
[1]);
2208 if not databytes
: break;
2209 datasize
= len(databytes
);
2210 fulldatasize
= datasize
+ fulldatasize
;
2213 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2214 downloaddiff
= fulldatasize
- prevdownsize
;
2215 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2216 prevdownsize
= fulldatasize
;
2219 fdata
= f
.getvalue();
2222 os
.remove(tmpfilename
);
2223 exec_time_end
= time
.time();
2224 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2225 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2228 if(not haveaiohttp
):
2229 def download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2230 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2234 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2235 global geturls_download_sleep
, havezstd
, havebrotli
;
2237 sleep
= geturls_download_sleep
;
2240 urlparts
= urlparse
.urlparse(httpurl
);
2241 if(isinstance(httpheaders
, list)):
2242 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2243 httpheaders
= fix_header_names(httpheaders
);
2244 if(httpuseragent
is not None):
2245 if('User-Agent' in httpheaders
):
2246 httpheaders
['User-Agent'] = httpuseragent
;
2248 httpuseragent
.update({'User-Agent': httpuseragent
});
2249 if(httpreferer
is not None):
2250 if('Referer' in httpheaders
):
2251 httpheaders
['Referer'] = httpreferer
;
2253 httpuseragent
.update({'Referer': httpreferer
});
2254 if(urlparts
.username
is not None or urlparts
.password
is not None):
2255 if(sys
.version
[0]=="2"):
2256 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2257 if(sys
.version
[0]>="3"):
2258 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2259 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2261 if(postdata
is not None and not isinstance(postdata
, dict)):
2262 postdata
= urlencode(postdata
);
2264 if(httpmethod
=="GET"):
2265 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2266 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2267 elif(httpmethod
=="POST"):
2268 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2269 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2271 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2272 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2273 except httpx
.ConnectTimeout
:
2274 log
.info("Error With URL "+httpurl
);
2276 except httpx
.ConnectError
:
2277 log
.info("Error With URL "+httpurl
);
2279 except socket
.timeout
:
2280 log
.info("Error With URL "+httpurl
);
2282 httpcodeout
= geturls_text
.status_code
;
2284 httpcodereason
= geturls_text
.reason_phrase
;
2286 httpcodereason
= http_status_to_reason(geturls_text
.status_code
);
2287 httpversionout
= geturls_text
.http_version
;
2288 httpmethodout
= httpmethod
;
2289 httpurlout
= str(geturls_text
.url
);
2290 httpheaderout
= geturls_text
.headers
;
2291 httpheadersentout
= geturls_text
.request
.headers
;
2292 if(isinstance(httpheaderout
, list)):
2293 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2294 if(sys
.version
[0]=="2"):
2296 prehttpheaderout
= httpheaderout
;
2297 httpheaderkeys
= httpheaderout
.keys();
2298 imax
= len(httpheaderkeys
);
2302 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2304 except AttributeError:
2306 httpheaderout
= fix_header_names(httpheaderout
);
2307 if(isinstance(httpheadersentout
, list)):
2308 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2309 httpheadersentout
= fix_header_names(httpheadersentout
);
2310 downloadsize
= httpheaderout
.get('Content-Length');
2311 if(downloadsize
is not None):
2312 downloadsize
= int(downloadsize
);
2313 if downloadsize
is None: downloadsize
= 0;
2316 log
.info("Downloading URL "+httpurl
);
2317 with
BytesIO() as strbuf
:
2319 databytes
= geturls_text
.read();
2320 if not databytes
: break;
2321 datasize
= len(databytes
);
2322 fulldatasize
= datasize
+ fulldatasize
;
2325 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2326 downloaddiff
= fulldatasize
- prevdownsize
;
2327 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2328 prevdownsize
= fulldatasize
;
2329 strbuf
.write(databytes
);
2332 returnval_content
= strbuf
.read();
2333 geturls_text
.close();
2334 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2336 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2339 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2341 returnval_content
= zlib
.decompress(returnval_content
);
2344 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2346 returnval_content
= brotli
.decompress(returnval_content
);
2347 except brotli
.error
:
2349 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2351 returnval_content
= zstandard
.decompress(returnval_content
);
2352 except zstandard
.error
:
2354 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2356 returnval_content
= lzma
.decompress(returnval_content
);
2357 except zstandard
.error
:
2359 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2361 returnval_content
= bz2
.decompress(returnval_content
);
2362 except zstandard
.error
:
2364 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpx"};
2365 geturls_text
.close();
2369 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2370 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2374 def download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2375 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2376 exec_time_start
= time
.time();
2377 myhash
= hashlib
.new("sha1");
2378 if(sys
.version
[0]=="2"):
2379 myhash
.update(httpurl
);
2380 myhash
.update(str(buffersize
));
2381 myhash
.update(str(exec_time_start
));
2382 if(sys
.version
[0]>="3"):
2383 myhash
.update(httpurl
.encode('utf-8'));
2384 myhash
.update(str(buffersize
).encode('utf-8'));
2385 myhash
.update(str(exec_time_start
).encode('utf-8'));
2386 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2388 sleep
= geturls_download_sleep
;
2391 pretmpfilename
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2392 if(not pretmpfilename
):
2394 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2395 tmpfilename
= f
.name
;
2397 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2398 except AttributeError:
2400 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2405 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2406 f
.write(pretmpfilename
.get('Content'));
2408 exec_time_end
= time
.time();
2409 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2410 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2414 def download_from_url_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2415 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2419 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2420 global geturls_download_sleep
, havezstd
, havebrotli
;
2422 sleep
= geturls_download_sleep
;
2425 if(not outfile
=="-"):
2426 outpath
= outpath
.rstrip(os
.path
.sep
);
2427 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2428 if(not os
.path
.exists(outpath
)):
2429 os
.makedirs(outpath
);
2430 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2432 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2434 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2435 if(not pretmpfilename
):
2437 tmpfilename
= pretmpfilename
.get('Filename');
2438 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2440 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2441 exec_time_start
= time
.time();
2442 shutil
.move(tmpfilename
, filepath
);
2444 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2445 except AttributeError:
2447 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2452 exec_time_end
= time
.time();
2453 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2454 if(os
.path
.exists(tmpfilename
)):
2455 os
.remove(tmpfilename
);
2456 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2458 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2459 tmpfilename
= pretmpfilename
.get('Filename');
2460 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2463 exec_time_start
= time
.time();
2464 with
open(tmpfilename
, 'rb') as ft
:
2467 databytes
= ft
.read(buffersize
[1]);
2468 if not databytes
: break;
2469 datasize
= len(databytes
);
2470 fulldatasize
= datasize
+ fulldatasize
;
2473 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2474 downloaddiff
= fulldatasize
- prevdownsize
;
2475 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2476 prevdownsize
= fulldatasize
;
2479 fdata
= f
.getvalue();
2482 os
.remove(tmpfilename
);
2483 exec_time_end
= time
.time();
2484 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2485 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2489 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2490 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2494 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2495 global geturls_download_sleep
, havezstd
, havebrotli
;
2497 sleep
= geturls_download_sleep
;
2500 urlparts
= urlparse
.urlparse(httpurl
);
2501 if(isinstance(httpheaders
, list)):
2502 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2503 httpheaders
= fix_header_names(httpheaders
);
2504 if(httpuseragent
is not None):
2505 if('User-Agent' in httpheaders
):
2506 httpheaders
['User-Agent'] = httpuseragent
;
2508 httpuseragent
.update({'User-Agent': httpuseragent
});
2509 if(httpreferer
is not None):
2510 if('Referer' in httpheaders
):
2511 httpheaders
['Referer'] = httpreferer
;
2513 httpuseragent
.update({'Referer': httpreferer
});
2514 if(urlparts
.username
is not None or urlparts
.password
is not None):
2515 if(sys
.version
[0]=="2"):
2516 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2517 if(sys
.version
[0]>="3"):
2518 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2519 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2521 if(postdata
is not None and not isinstance(postdata
, dict)):
2522 postdata
= urlencode(postdata
);
2524 if(httpmethod
=="GET"):
2525 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2526 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2527 elif(httpmethod
=="POST"):
2528 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2529 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2531 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2532 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2533 except httpx
.ConnectTimeout
:
2534 log
.info("Error With URL "+httpurl
);
2536 except httpx
.ConnectError
:
2537 log
.info("Error With URL "+httpurl
);
2539 except socket
.timeout
:
2540 log
.info("Error With URL "+httpurl
);
2542 httpcodeout
= geturls_text
.status_code
;
2544 httpcodereason
= geturls_text
.reason_phrase
;
2546 httpcodereason
= http_status_to_reason(geturls_text
.status_code
);
2547 httpversionout
= geturls_text
.http_version
;
2548 httpmethodout
= httpmethod
;
2549 httpurlout
= str(geturls_text
.url
);
2550 httpheaderout
= geturls_text
.headers
;
2551 httpheadersentout
= geturls_text
.request
.headers
;
2552 if(isinstance(httpheaderout
, list)):
2553 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2554 if(sys
.version
[0]=="2"):
2556 prehttpheaderout
= httpheaderout
;
2557 httpheaderkeys
= httpheaderout
.keys();
2558 imax
= len(httpheaderkeys
);
2562 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2564 except AttributeError:
2566 httpheaderout
= fix_header_names(httpheaderout
);
2567 if(isinstance(httpheadersentout
, list)):
2568 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2569 httpheadersentout
= fix_header_names(httpheadersentout
);
2570 downloadsize
= httpheaderout
.get('Content-Length');
2571 if(downloadsize
is not None):
2572 downloadsize
= int(downloadsize
);
2573 if downloadsize
is None: downloadsize
= 0;
2576 log
.info("Downloading URL "+httpurl
);
2577 with
BytesIO() as strbuf
:
2579 databytes
= geturls_text
.read();
2580 if not databytes
: break;
2581 datasize
= len(databytes
);
2582 fulldatasize
= datasize
+ fulldatasize
;
2585 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2586 downloaddiff
= fulldatasize
- prevdownsize
;
2587 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2588 prevdownsize
= fulldatasize
;
2589 strbuf
.write(databytes
);
2592 returnval_content
= strbuf
.read();
2593 geturls_text
.close();
2594 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2596 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2599 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2601 returnval_content
= zlib
.decompress(returnval_content
);
2604 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2606 returnval_content
= brotli
.decompress(returnval_content
);
2607 except brotli
.error
:
2609 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2611 returnval_content
= zstandard
.decompress(returnval_content
);
2612 except zstandard
.error
:
2614 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2616 returnval_content
= lzma
.decompress(returnval_content
);
2617 except zstandard
.error
:
2619 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2621 returnval_content
= bz2
.decompress(returnval_content
);
2622 except zstandard
.error
:
2624 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpx2"};
2625 geturls_text
.close();
2629 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2630 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2634 def download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2635 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2636 exec_time_start
= time
.time();
2637 myhash
= hashlib
.new("sha1");
2638 if(sys
.version
[0]=="2"):
2639 myhash
.update(httpurl
);
2640 myhash
.update(str(buffersize
));
2641 myhash
.update(str(exec_time_start
));
2642 if(sys
.version
[0]>="3"):
2643 myhash
.update(httpurl
.encode('utf-8'));
2644 myhash
.update(str(buffersize
).encode('utf-8'));
2645 myhash
.update(str(exec_time_start
).encode('utf-8'));
2646 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2648 sleep
= geturls_download_sleep
;
2651 pretmpfilename
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2652 if(not pretmpfilename
):
2654 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2655 tmpfilename
= f
.name
;
2657 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2658 except AttributeError:
2660 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2665 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2666 f
.write(pretmpfilename
.get('Content'));
2668 exec_time_end
= time
.time();
2669 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2670 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2674 def download_from_url_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2675 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2679 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2680 global geturls_download_sleep
, havezstd
, havebrotli
;
2682 sleep
= geturls_download_sleep
;
2685 if(not outfile
=="-"):
2686 outpath
= outpath
.rstrip(os
.path
.sep
);
2687 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2688 if(not os
.path
.exists(outpath
)):
2689 os
.makedirs(outpath
);
2690 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2692 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2694 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2695 if(not pretmpfilename
):
2697 tmpfilename
= pretmpfilename
.get('Filename');
2698 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2700 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2701 exec_time_start
= time
.time();
2702 shutil
.move(tmpfilename
, filepath
);
2704 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2705 except AttributeError:
2707 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2712 exec_time_end
= time
.time();
2713 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2714 if(os
.path
.exists(tmpfilename
)):
2715 os
.remove(tmpfilename
);
2716 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2718 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2719 tmpfilename
= pretmpfilename
.get('Filename');
2720 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2723 exec_time_start
= time
.time();
2724 with
open(tmpfilename
, 'rb') as ft
:
2727 databytes
= ft
.read(buffersize
[1]);
2728 if not databytes
: break;
2729 datasize
= len(databytes
);
2730 fulldatasize
= datasize
+ fulldatasize
;
2733 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2734 downloaddiff
= fulldatasize
- prevdownsize
;
2735 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2736 prevdownsize
= fulldatasize
;
2739 fdata
= f
.getvalue();
2742 os
.remove(tmpfilename
);
2743 exec_time_end
= time
.time();
2744 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2745 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2749 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2750 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2754 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2755 global geturls_download_sleep
, havezstd
, havebrotli
;
2757 sleep
= geturls_download_sleep
;
2760 urlparts
= urlparse
.urlparse(httpurl
);
2761 if(isinstance(httpheaders
, list)):
2762 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2763 httpheaders
= fix_header_names(httpheaders
);
2764 if(httpuseragent
is not None):
2765 if('User-Agent' in httpheaders
):
2766 httpheaders
['User-Agent'] = httpuseragent
;
2768 httpuseragent
.update({'User-Agent': httpuseragent
});
2769 if(httpreferer
is not None):
2770 if('Referer' in httpheaders
):
2771 httpheaders
['Referer'] = httpreferer
;
2773 httpuseragent
.update({'Referer': httpreferer
});
2774 if(urlparts
.username
is not None or urlparts
.password
is not None):
2775 if(sys
.version
[0]=="2"):
2776 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2777 if(sys
.version
[0]>="3"):
2778 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2779 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2781 if(postdata
is not None and not isinstance(postdata
, dict)):
2782 postdata
= urlencode(postdata
);
2784 if(httpmethod
=="GET"):
2785 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2786 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2787 elif(httpmethod
=="POST"):
2788 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2789 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2791 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2792 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2793 except httpcore
.ConnectTimeout
:
2794 log
.info("Error With URL "+httpurl
);
2796 except httpcore
.ConnectError
:
2797 log
.info("Error With URL "+httpurl
);
2799 except socket
.timeout
:
2800 log
.info("Error With URL "+httpurl
);
2802 httpcodeout
= geturls_text
.status
;
2803 httpcodereason
= http_status_to_reason(geturls_text
.status
);
2804 httpversionout
= "1.1";
2805 httpmethodout
= httpmethod
;
2806 httpurlout
= str(httpurl
);
2807 httpheaderout
= geturls_text
.headers
;
2808 httpheadersentout
= httpheaders
;
2809 if(isinstance(httpheaderout
, list)):
2810 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2811 if(sys
.version
[0]=="2"):
2813 prehttpheaderout
= httpheaderout
;
2814 httpheaderkeys
= httpheaderout
.keys();
2815 imax
= len(httpheaderkeys
);
2819 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2821 except AttributeError:
2823 httpheaderout
= fix_header_names(httpheaderout
);
2824 if(isinstance(httpheadersentout
, list)):
2825 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2826 httpheadersentout
= fix_header_names(httpheadersentout
);
2827 downloadsize
= httpheaderout
.get('Content-Length');
2828 if(downloadsize
is not None):
2829 downloadsize
= int(downloadsize
);
2830 if downloadsize
is None: downloadsize
= 0;
2833 log
.info("Downloading URL "+httpurl
);
2834 with
BytesIO() as strbuf
:
2836 databytes
= geturls_text
.read();
2837 if not databytes
: break;
2838 datasize
= len(databytes
);
2839 fulldatasize
= datasize
+ fulldatasize
;
2842 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2843 downloaddiff
= fulldatasize
- prevdownsize
;
2844 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2845 prevdownsize
= fulldatasize
;
2846 strbuf
.write(databytes
);
2849 returnval_content
= strbuf
.read();
2850 geturls_text
.close();
2851 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2853 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2856 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2858 returnval_content
= zlib
.decompress(returnval_content
);
2861 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2863 returnval_content
= brotli
.decompress(returnval_content
);
2864 except brotli
.error
:
2866 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2868 returnval_content
= zstandard
.decompress(returnval_content
);
2869 except zstandard
.error
:
2871 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
2873 returnval_content
= lzma
.decompress(returnval_content
);
2874 except zstandard
.error
:
2876 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2878 returnval_content
= bz2
.decompress(returnval_content
);
2879 except zstandard
.error
:
2881 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpcore"};
2882 geturls_text
.close();
2885 if(not havehttpcore
):
2886 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2887 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2891 def download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2892 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2893 exec_time_start
= time
.time();
2894 myhash
= hashlib
.new("sha1");
2895 if(sys
.version
[0]=="2"):
2896 myhash
.update(httpurl
);
2897 myhash
.update(str(buffersize
));
2898 myhash
.update(str(exec_time_start
));
2899 if(sys
.version
[0]>="3"):
2900 myhash
.update(httpurl
.encode('utf-8'));
2901 myhash
.update(str(buffersize
).encode('utf-8'));
2902 myhash
.update(str(exec_time_start
).encode('utf-8'));
2903 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2905 sleep
= geturls_download_sleep
;
2908 pretmpfilename
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2909 if(not pretmpfilename
):
2911 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2912 tmpfilename
= f
.name
;
2914 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2915 except AttributeError:
2917 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2922 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2923 f
.write(pretmpfilename
.get('Content'));
2925 exec_time_end
= time
.time();
2926 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2927 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2930 if(not havehttpcore
):
2931 def download_from_url_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2932 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2936 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2937 global geturls_download_sleep
, havezstd
, havebrotli
;
2939 sleep
= geturls_download_sleep
;
2942 if(not outfile
=="-"):
2943 outpath
= outpath
.rstrip(os
.path
.sep
);
2944 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2945 if(not os
.path
.exists(outpath
)):
2946 os
.makedirs(outpath
);
2947 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2949 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2951 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2952 if(not pretmpfilename
):
2954 tmpfilename
= pretmpfilename
.get('Filename');
2955 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2957 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2958 exec_time_start
= time
.time();
2959 shutil
.move(tmpfilename
, filepath
);
2961 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2962 except AttributeError:
2964 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2969 exec_time_end
= time
.time();
2970 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2971 if(os
.path
.exists(tmpfilename
)):
2972 os
.remove(tmpfilename
);
2973 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2975 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2976 tmpfilename
= pretmpfilename
.get('Filename');
2977 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2980 exec_time_start
= time
.time();
2981 with
open(tmpfilename
, 'rb') as ft
:
2984 databytes
= ft
.read(buffersize
[1]);
2985 if not databytes
: break;
2986 datasize
= len(databytes
);
2987 fulldatasize
= datasize
+ fulldatasize
;
2990 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2991 downloaddiff
= fulldatasize
- prevdownsize
;
2992 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2993 prevdownsize
= fulldatasize
;
2996 fdata
= f
.getvalue();
2999 os
.remove(tmpfilename
);
3000 exec_time_end
= time
.time();
3001 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3002 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3005 if(not havehttpcore
):
3006 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3007 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3011 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3012 global geturls_download_sleep
, havezstd
, havebrotli
;
3014 sleep
= geturls_download_sleep
;
3017 urlparts
= urlparse
.urlparse(httpurl
);
3018 if(isinstance(httpheaders
, list)):
3019 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3020 httpheaders
= fix_header_names(httpheaders
);
3021 if(httpuseragent
is not None):
3022 if('User-Agent' in httpheaders
):
3023 httpheaders
['User-Agent'] = httpuseragent
;
3025 httpuseragent
.update({'User-Agent': httpuseragent
});
3026 if(httpreferer
is not None):
3027 if('Referer' in httpheaders
):
3028 httpheaders
['Referer'] = httpreferer
;
3030 httpuseragent
.update({'Referer': httpreferer
});
3031 if(urlparts
.username
is not None or urlparts
.password
is not None):
3032 if(sys
.version
[0]=="2"):
3033 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3034 if(sys
.version
[0]>="3"):
3035 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3036 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3038 if(postdata
is not None and not isinstance(postdata
, dict)):
3039 postdata
= urlencode(postdata
);
3041 if(httpmethod
=="GET"):
3042 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3043 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3044 elif(httpmethod
=="POST"):
3045 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3046 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
3048 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3049 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3050 except httpcore
.ConnectTimeout
:
3051 log
.info("Error With URL "+httpurl
);
3053 except httpcore
.ConnectError
:
3054 log
.info("Error With URL "+httpurl
);
3056 except socket
.timeout
:
3057 log
.info("Error With URL "+httpurl
);
3059 httpcodeout
= geturls_text
.status
;
3060 httpcodereason
= http_status_to_reason(geturls_text
.status
);
3061 httpversionout
= "1.1";
3062 httpmethodout
= httpmethod
;
3063 httpurlout
= str(httpurl
);
3064 httpheaderout
= geturls_text
.headers
;
3065 httpheadersentout
= httpheaders
;
3066 if(isinstance(httpheaderout
, list)):
3067 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3068 if(sys
.version
[0]=="2"):
3070 prehttpheaderout
= httpheaderout
;
3071 httpheaderkeys
= httpheaderout
.keys();
3072 imax
= len(httpheaderkeys
);
3076 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3078 except AttributeError:
3080 httpheaderout
= fix_header_names(httpheaderout
);
3081 if(isinstance(httpheadersentout
, list)):
3082 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3083 httpheadersentout
= fix_header_names(httpheadersentout
);
3084 downloadsize
= httpheaderout
.get('Content-Length');
3085 if(downloadsize
is not None):
3086 downloadsize
= int(downloadsize
);
3087 if downloadsize
is None: downloadsize
= 0;
3090 log
.info("Downloading URL "+httpurl
);
3091 with
BytesIO() as strbuf
:
3093 databytes
= geturls_text
.read();
3094 if not databytes
: break;
3095 datasize
= len(databytes
);
3096 fulldatasize
= datasize
+ fulldatasize
;
3099 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3100 downloaddiff
= fulldatasize
- prevdownsize
;
3101 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3102 prevdownsize
= fulldatasize
;
3103 strbuf
.write(databytes
);
3106 returnval_content
= strbuf
.read();
3107 geturls_text
.close();
3108 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3110 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3113 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3115 returnval_content
= zlib
.decompress(returnval_content
);
3118 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3120 returnval_content
= brotli
.decompress(returnval_content
);
3121 except brotli
.error
:
3123 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3125 returnval_content
= zstandard
.decompress(returnval_content
);
3126 except zstandard
.error
:
3128 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3130 returnval_content
= lzma
.decompress(returnval_content
);
3131 except zstandard
.error
:
3133 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3135 returnval_content
= bz2
.decompress(returnval_content
);
3136 except zstandard
.error
:
3138 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpcore2"};
3139 geturls_text
.close();
3142 if(not havehttpcore
):
3143 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3144 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3148 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3149 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3150 exec_time_start
= time
.time();
3151 myhash
= hashlib
.new("sha1");
3152 if(sys
.version
[0]=="2"):
3153 myhash
.update(httpurl
);
3154 myhash
.update(str(buffersize
));
3155 myhash
.update(str(exec_time_start
));
3156 if(sys
.version
[0]>="3"):
3157 myhash
.update(httpurl
.encode('utf-8'));
3158 myhash
.update(str(buffersize
).encode('utf-8'));
3159 myhash
.update(str(exec_time_start
).encode('utf-8'));
3160 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3162 sleep
= geturls_download_sleep
;
3165 pretmpfilename
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3166 if(not pretmpfilename
):
3168 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3169 tmpfilename
= f
.name
;
3171 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3172 except AttributeError:
3174 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3179 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3180 f
.write(pretmpfilename
.get('Content'));
3182 exec_time_end
= time
.time();
3183 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3184 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3187 if(not havehttpcore
):
3188 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3189 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3193 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3194 global geturls_download_sleep
, havezstd
, havebrotli
;
3196 sleep
= geturls_download_sleep
;
3199 if(not outfile
=="-"):
3200 outpath
= outpath
.rstrip(os
.path
.sep
);
3201 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3202 if(not os
.path
.exists(outpath
)):
3203 os
.makedirs(outpath
);
3204 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3206 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3208 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3209 if(not pretmpfilename
):
3211 tmpfilename
= pretmpfilename
.get('Filename');
3212 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3214 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3215 exec_time_start
= time
.time();
3216 shutil
.move(tmpfilename
, filepath
);
3218 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3219 except AttributeError:
3221 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3226 exec_time_end
= time
.time();
3227 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3228 if(os
.path
.exists(tmpfilename
)):
3229 os
.remove(tmpfilename
);
3230 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3232 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3233 tmpfilename
= pretmpfilename
.get('Filename');
3234 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3237 exec_time_start
= time
.time();
3238 with
open(tmpfilename
, 'rb') as ft
:
3241 databytes
= ft
.read(buffersize
[1]);
3242 if not databytes
: break;
3243 datasize
= len(databytes
);
3244 fulldatasize
= datasize
+ fulldatasize
;
3247 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3248 downloaddiff
= fulldatasize
- prevdownsize
;
3249 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3250 prevdownsize
= fulldatasize
;
3253 fdata
= f
.getvalue();
3256 os
.remove(tmpfilename
);
3257 exec_time_end
= time
.time();
3258 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3259 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3263 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3264 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3268 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3269 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3272 if(not haveurllib3
):
3273 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3274 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3278 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3279 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3282 if(not haveurllib3
):
3283 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3284 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3288 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3289 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3292 if(not haveurllib3
):
3293 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3294 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3298 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3299 global geturls_download_sleep
, havezstd
, havebrotli
;
3301 sleep
= geturls_download_sleep
;
3304 urlparts
= urlparse
.urlparse(httpurl
);
3305 if(isinstance(httpheaders
, list)):
3306 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3307 httpheaders
= fix_header_names(httpheaders
);
3308 if(httpuseragent
is not None):
3309 if('User-Agent' in httpheaders
):
3310 httpheaders
['User-Agent'] = httpuseragent
;
3312 httpuseragent
.update({'User-Agent': httpuseragent
});
3313 if(httpreferer
is not None):
3314 if('Referer' in httpheaders
):
3315 httpheaders
['Referer'] = httpreferer
;
3317 httpuseragent
.update({'Referer': httpreferer
});
3318 if(urlparts
.username
is not None or urlparts
.password
is not None):
3319 if(sys
.version
[0]=="2"):
3320 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3321 if(sys
.version
[0]>="3"):
3322 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3323 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3325 timeout
= urllib3
.util
.Timeout(connect
=timeout
, read
=timeout
);
3326 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
, timeout
=timeout
);
3327 if(postdata
is not None and not isinstance(postdata
, dict)):
3328 postdata
= urlencode(postdata
);
3330 if(httpmethod
=="GET"):
3331 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3332 elif(httpmethod
=="POST"):
3333 geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
3335 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3336 except urllib3
.exceptions
.ConnectTimeoutError
:
3337 log
.info("Error With URL "+httpurl
);
3339 except urllib3
.exceptions
.ConnectError
:
3340 log
.info("Error With URL "+httpurl
);
3342 except urllib3
.exceptions
.MaxRetryError
:
3343 log
.info("Error With URL "+httpurl
);
3345 except socket
.timeout
:
3346 log
.info("Error With URL "+httpurl
);
3349 log
.info("Error With URL "+httpurl
);
3351 httpcodeout
= geturls_text
.status
;
3352 httpcodereason
= geturls_text
.reason
;
3353 if(geturls_text
.version
=="10"):
3354 httpversionout
= "1.0";
3356 httpversionout
= "1.1";
3357 httpmethodout
= httpmethod
;
3358 httpurlout
= geturls_text
.geturl();
3359 httpheaderout
= geturls_text
.info();
3360 httpheadersentout
= httpheaders
;
3361 if(isinstance(httpheaderout
, list)):
3362 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3363 if(sys
.version
[0]=="2"):
3365 prehttpheaderout
= httpheaderout
;
3366 httpheaderkeys
= httpheaderout
.keys();
3367 imax
= len(httpheaderkeys
);
3371 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3373 except AttributeError:
3375 httpheaderout
= fix_header_names(httpheaderout
);
3376 if(isinstance(httpheadersentout
, list)):
3377 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3378 httpheadersentout
= fix_header_names(httpheadersentout
);
3379 downloadsize
= httpheaderout
.get('Content-Length');
3380 if(downloadsize
is not None):
3381 downloadsize
= int(downloadsize
);
3382 if downloadsize
is None: downloadsize
= 0;
3385 log
.info("Downloading URL "+httpurl
);
3386 with
BytesIO() as strbuf
:
3388 databytes
= geturls_text
.read(buffersize
);
3389 if not databytes
: break;
3390 datasize
= len(databytes
);
3391 fulldatasize
= datasize
+ fulldatasize
;
3394 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3395 downloaddiff
= fulldatasize
- prevdownsize
;
3396 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3397 prevdownsize
= fulldatasize
;
3398 strbuf
.write(databytes
);
3400 returnval_content
= strbuf
.read();
3401 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3403 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3406 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3408 returnval_content
= zlib
.decompress(returnval_content
);
3411 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3413 returnval_content
= brotli
.decompress(returnval_content
);
3414 except brotli
.error
:
3416 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3418 returnval_content
= zstandard
.decompress(returnval_content
);
3419 except zstandard
.error
:
3421 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3423 returnval_content
= lzma
.decompress(returnval_content
);
3424 except zstandard
.error
:
3426 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3428 returnval_content
= bz2
.decompress(returnval_content
);
3429 except zstandard
.error
:
3431 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "urllib3"};
3432 geturls_text
.close();
3435 if(not haveurllib3
):
3436 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3437 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3441 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3442 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3443 exec_time_start
= time
.time();
3444 myhash
= hashlib
.new("sha1");
3445 if(sys
.version
[0]=="2"):
3446 myhash
.update(httpurl
);
3447 myhash
.update(str(buffersize
));
3448 myhash
.update(str(exec_time_start
));
3449 if(sys
.version
[0]>="3"):
3450 myhash
.update(httpurl
.encode('utf-8'));
3451 myhash
.update(str(buffersize
).encode('utf-8'));
3452 myhash
.update(str(exec_time_start
).encode('utf-8'));
3453 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3455 sleep
= geturls_download_sleep
;
3458 pretmpfilename
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3459 if(not pretmpfilename
):
3461 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3462 tmpfilename
= f
.name
;
3464 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3465 except AttributeError:
3467 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3472 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3473 f
.write(pretmpfilename
.get('Content'));
3475 exec_time_end
= time
.time();
3476 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3477 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3480 if(not haveurllib3
):
3481 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3482 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3486 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3487 global geturls_download_sleep
, havezstd
, havebrotli
;
3489 sleep
= geturls_download_sleep
;
3492 if(not outfile
=="-"):
3493 outpath
= outpath
.rstrip(os
.path
.sep
);
3494 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3495 if(not os
.path
.exists(outpath
)):
3496 os
.makedirs(outpath
);
3497 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3499 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3501 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3502 if(not pretmpfilename
):
3504 tmpfilename
= pretmpfilename
.get('Filename');
3505 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3507 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3508 exec_time_start
= time
.time();
3509 shutil
.move(tmpfilename
, filepath
);
3511 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3512 except AttributeError:
3514 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3519 exec_time_end
= time
.time();
3520 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3521 if(os
.path
.exists(tmpfilename
)):
3522 os
.remove(tmpfilename
);
3523 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3525 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3526 tmpfilename
= pretmpfilename
.get('Filename');
3527 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3530 exec_time_start
= time
.time();
3531 with
open(tmpfilename
, 'rb') as ft
:
3534 databytes
= ft
.read(buffersize
[1]);
3535 if not databytes
: break;
3536 datasize
= len(databytes
);
3537 fulldatasize
= datasize
+ fulldatasize
;
3540 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3541 downloaddiff
= fulldatasize
- prevdownsize
;
3542 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3543 prevdownsize
= fulldatasize
;
3546 fdata
= f
.getvalue();
3549 os
.remove(tmpfilename
);
3550 exec_time_end
= time
.time();
3551 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3552 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3555 if(not haveurllib3
):
3556 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3557 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3561 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3562 global geturls_download_sleep
, havezstd
, havebrotli
;
3564 sleep
= geturls_download_sleep
;
3567 urlparts
= urlparse
.urlparse(httpurl
);
3568 if(isinstance(httpheaders
, list)):
3569 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3570 httpheaders
= fix_header_names(httpheaders
);
3571 if(httpuseragent
is not None):
3572 if('User-Agent' in httpheaders
):
3573 httpheaders
['User-Agent'] = httpuseragent
;
3575 httpuseragent
.update({'User-Agent': httpuseragent
});
3576 if(httpreferer
is not None):
3577 if('Referer' in httpheaders
):
3578 httpheaders
['Referer'] = httpreferer
;
3580 httpuseragent
.update({'Referer': httpreferer
});
3581 if(urlparts
.username
is not None or urlparts
.password
is not None):
3582 if(sys
.version
[0]=="2"):
3583 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3584 if(sys
.version
[0]>="3"):
3585 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3586 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3587 geturls_opener
= mechanize
.Browser();
3588 if(isinstance(httpheaders
, dict)):
3589 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
3591 geturls_opener
.addheaders
= httpheaders
;
3592 geturls_opener
.set_cookiejar(httpcookie
);
3593 geturls_opener
.set_handle_robots(False);
3594 if(postdata
is not None and not isinstance(postdata
, dict)):
3595 postdata
= urlencode(postdata
);
3597 if(httpmethod
=="GET"):
3598 geturls_text
= geturls_opener
.open(httpurl
);
3599 elif(httpmethod
=="POST"):
3600 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
3602 geturls_text
= geturls_opener
.open(httpurl
);
3603 except mechanize
.HTTPError
as geturls_text_error
:
3604 geturls_text
= geturls_text_error
;
3605 log
.info("Error With URL "+httpurl
);
3607 log
.info("Error With URL "+httpurl
);
3609 except socket
.timeout
:
3610 log
.info("Error With URL "+httpurl
);
3612 httpcodeout
= geturls_text
.code
;
3613 httpcodereason
= geturls_text
.msg
;
3614 httpversionout
= "1.1";
3615 httpmethodout
= httpmethod
;
3616 httpurlout
= geturls_text
.geturl();
3617 httpheaderout
= geturls_text
.info();
3618 reqhead
= geturls_opener
.request
;
3619 httpheadersentout
= reqhead
.header_items();
3620 if(isinstance(httpheaderout
, list)):
3621 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3622 if(sys
.version
[0]=="2"):
3624 prehttpheaderout
= httpheaderout
;
3625 httpheaderkeys
= httpheaderout
.keys();
3626 imax
= len(httpheaderkeys
);
3630 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3632 except AttributeError:
3634 httpheaderout
= fix_header_names(httpheaderout
);
3635 if(isinstance(httpheadersentout
, list)):
3636 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3637 httpheadersentout
= fix_header_names(httpheadersentout
);
3638 downloadsize
= httpheaderout
.get('Content-Length');
3639 if(downloadsize
is not None):
3640 downloadsize
= int(downloadsize
);
3641 if downloadsize
is None: downloadsize
= 0;
3644 log
.info("Downloading URL "+httpurl
);
3645 with
BytesIO() as strbuf
:
3647 databytes
= geturls_text
.read(buffersize
);
3648 if not databytes
: break;
3649 datasize
= len(databytes
);
3650 fulldatasize
= datasize
+ fulldatasize
;
3653 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3654 downloaddiff
= fulldatasize
- prevdownsize
;
3655 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3656 prevdownsize
= fulldatasize
;
3657 strbuf
.write(databytes
);
3659 returnval_content
= strbuf
.read();
3660 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3662 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3665 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3667 returnval_content
= zlib
.decompress(returnval_content
);
3670 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3672 returnval_content
= brotli
.decompress(returnval_content
);
3673 except brotli
.error
:
3675 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3677 returnval_content
= zstandard
.decompress(returnval_content
);
3678 except zstandard
.error
:
3680 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3682 returnval_content
= lzma
.decompress(returnval_content
);
3683 except zstandard
.error
:
3685 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3687 returnval_content
= bz2
.decompress(returnval_content
);
3688 except zstandard
.error
:
3690 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "mechanize"};
3691 geturls_text
.close();
3694 if(not havemechanize
):
3695 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3696 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3700 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3701 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3702 exec_time_start
= time
.time();
3703 myhash
= hashlib
.new("sha1");
3704 if(sys
.version
[0]=="2"):
3705 myhash
.update(httpurl
);
3706 myhash
.update(str(buffersize
));
3707 myhash
.update(str(exec_time_start
));
3708 if(sys
.version
[0]>="3"):
3709 myhash
.update(httpurl
.encode('utf-8'));
3710 myhash
.update(str(buffersize
).encode('utf-8'));
3711 myhash
.update(str(exec_time_start
).encode('utf-8'));
3712 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3714 sleep
= geturls_download_sleep
;
3717 pretmpfilename
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3718 if(not pretmpfilename
):
3720 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3721 tmpfilename
= f
.name
;
3723 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3724 except AttributeError:
3726 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3731 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3732 f
.write(pretmpfilename
.get('Content'));
3734 exec_time_end
= time
.time();
3735 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3736 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3739 if(not havemechanize
):
3740 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3741 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3745 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3746 global geturls_download_sleep
, havezstd
, havebrotli
;
3748 sleep
= geturls_download_sleep
;
3751 if(not outfile
=="-"):
3752 outpath
= outpath
.rstrip(os
.path
.sep
);
3753 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3754 if(not os
.path
.exists(outpath
)):
3755 os
.makedirs(outpath
);
3756 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3758 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3760 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3761 if(not pretmpfilename
):
3763 tmpfilename
= pretmpfilename
.get('Filename');
3764 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3766 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3767 exec_time_start
= time
.time();
3768 shutil
.move(tmpfilename
, filepath
);
3770 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3771 except AttributeError:
3773 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3778 exec_time_end
= time
.time();
3779 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3780 if(os
.path
.exists(tmpfilename
)):
3781 os
.remove(tmpfilename
);
3782 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3784 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3785 tmpfilename
= pretmpfilename
.get('Filename');
3786 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3789 exec_time_start
= time
.time();
3790 with
open(tmpfilename
, 'rb') as ft
:
3793 databytes
= ft
.read(buffersize
[1]);
3794 if not databytes
: break;
3795 datasize
= len(databytes
);
3796 fulldatasize
= datasize
+ fulldatasize
;
3799 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3800 downloaddiff
= fulldatasize
- prevdownsize
;
3801 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3802 prevdownsize
= fulldatasize
;
3805 fdata
= f
.getvalue();
3808 os
.remove(tmpfilename
);
3809 exec_time_end
= time
.time();
3810 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3811 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3814 if(not havemechanize
):
3815 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3816 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3820 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3821 global geturls_download_sleep
, havezstd
, havebrotli
;
3823 sleep
= geturls_download_sleep
;
3826 urlparts
= urlparse
.urlparse(httpurl
);
3827 if(isinstance(httpheaders
, list)):
3828 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3829 httpheaders
= fix_header_names(httpheaders
);
3830 if(httpuseragent
is not None):
3831 if('User-Agent' in httpheaders
):
3832 httpheaders
['User-Agent'] = httpuseragent
;
3834 httpuseragent
.update({'User-Agent': httpuseragent
});
3835 if(httpreferer
is not None):
3836 if('Referer' in httpheaders
):
3837 httpheaders
['Referer'] = httpreferer
;
3839 httpuseragent
.update({'Referer': httpreferer
});
3840 if(urlparts
.username
is not None or urlparts
.password
is not None):
3841 if(sys
.version
[0]=="2"):
3842 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3843 if(sys
.version
[0]>="3"):
3844 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3845 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3846 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
3847 if(isinstance(httpheaders
, dict)):
3848 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
3849 geturls_opener
.addheaders
= httpheaders
;
3851 if(postdata
is not None and not isinstance(postdata
, dict)):
3852 postdata
= urlencode(postdata
);
3853 retrieved_body
= BytesIO();
3854 retrieved_headers
= BytesIO();
3856 if(httpmethod
=="GET"):
3857 geturls_text
= pycurl
.Curl();
3858 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3859 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3860 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3861 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3862 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3863 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3864 geturls_text
.perform();
3865 elif(httpmethod
=="POST"):
3866 geturls_text
= pycurl
.Curl();
3867 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3868 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3869 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3870 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3871 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3872 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3873 geturls_text
.setopt(geturls_text
.POST
, True);
3874 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
3875 geturls_text
.perform();
3877 geturls_text
= pycurl
.Curl();
3878 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3879 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3880 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3881 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3882 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3883 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3884 geturls_text
.perform();
3885 retrieved_headers
.seek(0);
3886 if(sys
.version
[0]=="2"):
3887 pycurlhead
= retrieved_headers
.read();
3888 if(sys
.version
[0]>="3"):
3889 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
3890 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip().rstrip('\r\n'))[0];
3891 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
3892 retrieved_body
.seek(0);
3893 except socket
.timeout
:
3894 log
.info("Error With URL "+httpurl
);
3896 except socket
.gaierror
:
3897 log
.info("Error With URL "+httpurl
);
3900 log
.info("Error With URL "+httpurl
);
3902 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
3903 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
3904 httpversionout
= pyhttpverinfo
[0];
3905 httpmethodout
= httpmethod
;
3906 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
3907 httpheaderout
= pycurlheadersout
;
3908 httpheadersentout
= httpheaders
;
3909 if(isinstance(httpheaderout
, list)):
3910 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
3911 if(sys
.version
[0]=="2"):
3913 prehttpheaderout
= httpheaderout
;
3914 httpheaderkeys
= httpheaderout
.keys();
3915 imax
= len(httpheaderkeys
);
3919 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3921 except AttributeError:
3923 httpheaderout
= fix_header_names(httpheaderout
);
3924 if(isinstance(httpheadersentout
, list)):
3925 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
3926 httpheadersentout
= fix_header_names(httpheadersentout
);
3927 downloadsize
= httpheaderout
.get('Content-Length');
3928 if(downloadsize
is not None):
3929 downloadsize
= int(downloadsize
);
3930 if downloadsize
is None: downloadsize
= 0;
3933 log
.info("Downloading URL "+httpurl
);
3934 with
BytesIO() as strbuf
:
3936 databytes
= retrieved_body
.read(buffersize
);
3937 if not databytes
: break;
3938 datasize
= len(databytes
);
3939 fulldatasize
= datasize
+ fulldatasize
;
3942 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3943 downloaddiff
= fulldatasize
- prevdownsize
;
3944 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3945 prevdownsize
= fulldatasize
;
3946 strbuf
.write(databytes
);
3948 returnval_content
= strbuf
.read();
3949 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3951 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3954 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3956 returnval_content
= zlib
.decompress(returnval_content
);
3959 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3961 returnval_content
= brotli
.decompress(returnval_content
);
3962 except brotli
.error
:
3964 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3966 returnval_content
= zstandard
.decompress(returnval_content
);
3967 except zstandard
.error
:
3969 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
3971 returnval_content
= lzma
.decompress(returnval_content
);
3972 except zstandard
.error
:
3974 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3976 returnval_content
= bz2
.decompress(returnval_content
);
3977 except zstandard
.error
:
3979 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl"};
3980 geturls_text
.close();
3984 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3985 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3989 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3990 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3991 exec_time_start
= time
.time();
3992 myhash
= hashlib
.new("sha1");
3993 if(sys
.version
[0]=="2"):
3994 myhash
.update(httpurl
);
3995 myhash
.update(str(buffersize
));
3996 myhash
.update(str(exec_time_start
));
3997 if(sys
.version
[0]>="3"):
3998 myhash
.update(httpurl
.encode('utf-8'));
3999 myhash
.update(str(buffersize
).encode('utf-8'));
4000 myhash
.update(str(exec_time_start
).encode('utf-8'));
4001 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4003 sleep
= geturls_download_sleep
;
4006 pretmpfilename
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4007 if(not pretmpfilename
):
4009 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4010 tmpfilename
= f
.name
;
4012 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4013 except AttributeError:
4015 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4020 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4021 f
.write(pretmpfilename
.get('Content'));
4023 exec_time_end
= time
.time();
4024 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4025 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4029 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4030 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4034 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4035 global geturls_download_sleep
, havezstd
, havebrotli
;
4037 sleep
= geturls_download_sleep
;
4040 if(not outfile
=="-"):
4041 outpath
= outpath
.rstrip(os
.path
.sep
);
4042 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4043 if(not os
.path
.exists(outpath
)):
4044 os
.makedirs(outpath
);
4045 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4047 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4049 pretmpfilename
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4050 if(not pretmpfilename
):
4052 tmpfilename
= pretmpfilename
.get('Filename');
4053 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4055 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4056 exec_time_start
= time
.time();
4057 shutil
.move(tmpfilename
, filepath
);
4059 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4060 except AttributeError:
4062 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4067 exec_time_end
= time
.time();
4068 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4069 if(os
.path
.exists(tmpfilename
)):
4070 os
.remove(tmpfilename
);
4071 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4073 pretmpfilename
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4074 tmpfilename
= pretmpfilename
.get('Filename');
4075 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4078 exec_time_start
= time
.time();
4079 with
open(tmpfilename
, 'rb') as ft
:
4082 databytes
= ft
.read(buffersize
[1]);
4083 if not databytes
: break;
4084 datasize
= len(databytes
);
4085 fulldatasize
= datasize
+ fulldatasize
;
4088 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4089 downloaddiff
= fulldatasize
- prevdownsize
;
4090 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4091 prevdownsize
= fulldatasize
;
4094 fdata
= f
.getvalue();
4097 os
.remove(tmpfilename
);
4098 exec_time_end
= time
.time();
4099 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4100 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4104 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4105 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4108 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4109 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4110 global geturls_download_sleep
, havezstd
, havebrotli
;
4112 sleep
= geturls_download_sleep
;
4115 urlparts
= urlparse
.urlparse(httpurl
);
4116 if(isinstance(httpheaders
, list)):
4117 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4118 httpheaders
= fix_header_names(httpheaders
);
4119 if(httpuseragent
is not None):
4120 if('User-Agent' in httpheaders
):
4121 httpheaders
['User-Agent'] = httpuseragent
;
4123 httpuseragent
.update({'User-Agent': httpuseragent
});
4124 if(httpreferer
is not None):
4125 if('Referer' in httpheaders
):
4126 httpheaders
['Referer'] = httpreferer
;
4128 httpuseragent
.update({'Referer': httpreferer
});
4129 if(urlparts
.username
is not None or urlparts
.password
is not None):
4130 if(sys
.version
[0]=="2"):
4131 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
4132 if(sys
.version
[0]>="3"):
4133 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4134 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4135 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
4136 if(isinstance(httpheaders
, dict)):
4137 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
4138 geturls_opener
.addheaders
= httpheaders
;
4140 if(postdata
is not None and not isinstance(postdata
, dict)):
4141 postdata
= urlencode(postdata
);
4142 retrieved_body
= BytesIO();
4143 retrieved_headers
= BytesIO();
4145 if(httpmethod
=="GET"):
4146 geturls_text
= pycurl
.Curl();
4147 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4148 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4149 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4150 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4151 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4152 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4153 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4154 geturls_text
.perform();
4155 elif(httpmethod
=="POST"):
4156 geturls_text
= pycurl
.Curl();
4157 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4158 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4159 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4160 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4161 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4162 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4163 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4164 geturls_text
.setopt(geturls_text
.POST
, True);
4165 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
4166 geturls_text
.perform();
4168 geturls_text
= pycurl
.Curl();
4169 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4170 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4171 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4172 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4173 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4174 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4175 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4176 geturls_text
.perform();
4177 retrieved_headers
.seek(0);
4178 if(sys
.version
[0]=="2"):
4179 pycurlhead
= retrieved_headers
.read();
4180 if(sys
.version
[0]>="3"):
4181 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
4182 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip())[0];
4183 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
4184 retrieved_body
.seek(0);
4185 except socket
.timeout
:
4186 log
.info("Error With URL "+httpurl
);
4188 except socket
.gaierror
:
4189 log
.info("Error With URL "+httpurl
);
4192 log
.info("Error With URL "+httpurl
);
4194 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
4195 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
4196 httpversionout
= pyhttpverinfo
[0];
4197 httpmethodout
= httpmethod
;
4198 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
4199 httpheaderout
= pycurlheadersout
;
4200 httpheadersentout
= httpheaders
;
4201 if(isinstance(httpheaderout
, list)):
4202 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
4203 if(sys
.version
[0]=="2"):
4205 prehttpheaderout
= httpheaderout
;
4206 httpheaderkeys
= httpheaderout
.keys();
4207 imax
= len(httpheaderkeys
);
4211 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4213 except AttributeError:
4215 httpheaderout
= fix_header_names(httpheaderout
);
4216 if(isinstance(httpheadersentout
, list)):
4217 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
4218 httpheadersentout
= fix_header_names(httpheadersentout
);
4219 downloadsize
= httpheaderout
.get('Content-Length');
4220 if(downloadsize
is not None):
4221 downloadsize
= int(downloadsize
);
4222 if downloadsize
is None: downloadsize
= 0;
4225 log
.info("Downloading URL "+httpurl
);
4226 with
BytesIO() as strbuf
:
4228 databytes
= retrieved_body
.read(buffersize
);
4229 if not databytes
: break;
4230 datasize
= len(databytes
);
4231 fulldatasize
= datasize
+ fulldatasize
;
4234 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4235 downloaddiff
= fulldatasize
- prevdownsize
;
4236 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4237 prevdownsize
= fulldatasize
;
4238 strbuf
.write(databytes
);
4240 returnval_content
= strbuf
.read();
4241 if(httpheaderout
.get("Content-Encoding")=="gzip"):
4243 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
4246 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
4248 returnval_content
= zlib
.decompress(returnval_content
);
4251 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4253 returnval_content
= brotli
.decompress(returnval_content
);
4254 except brotli
.error
:
4256 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
4258 returnval_content
= zstandard
.decompress(returnval_content
);
4259 except zstandard
.error
:
4261 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
4263 returnval_content
= lzma
.decompress(returnval_content
);
4264 except zstandard
.error
:
4266 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
4268 returnval_content
= bz2
.decompress(returnval_content
);
4269 except zstandard
.error
:
4271 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl2"};
4272 geturls_text
.close();
4276 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4277 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4280 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4281 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4282 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4285 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4286 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4287 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4288 exec_time_start
= time
.time();
4289 myhash
= hashlib
.new("sha1");
4290 if(sys
.version
[0]=="2"):
4291 myhash
.update(httpurl
);
4292 myhash
.update(str(buffersize
));
4293 myhash
.update(str(exec_time_start
));
4294 if(sys
.version
[0]>="3"):
4295 myhash
.update(httpurl
.encode('utf-8'));
4296 myhash
.update(str(buffersize
).encode('utf-8'));
4297 myhash
.update(str(exec_time_start
).encode('utf-8'));
4298 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4300 sleep
= geturls_download_sleep
;
4303 pretmpfilename
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4304 if(not pretmpfilename
):
4306 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4307 tmpfilename
= f
.name
;
4309 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4310 except AttributeError:
4312 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4317 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4318 f
.write(pretmpfilename
.get('Content'));
4320 exec_time_end
= time
.time();
4321 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4322 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4326 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4327 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4330 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4331 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4332 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4335 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4336 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4337 global geturls_download_sleep
, havezstd
, havebrotli
;
4339 sleep
= geturls_download_sleep
;
4342 if(not outfile
=="-"):
4343 outpath
= outpath
.rstrip(os
.path
.sep
);
4344 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4345 if(not os
.path
.exists(outpath
)):
4346 os
.makedirs(outpath
);
4347 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4349 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4351 pretmpfilename
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4352 if(not pretmpfilename
):
4354 tmpfilename
= pretmpfilename
.get('Filename');
4355 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4357 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4358 exec_time_start
= time
.time();
4359 shutil
.move(tmpfilename
, filepath
);
4361 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4362 except AttributeError:
4364 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4369 exec_time_end
= time
.time();
4370 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4371 if(os
.path
.exists(tmpfilename
)):
4372 os
.remove(tmpfilename
);
4373 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4375 pretmpfilename
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4376 tmpfilename
= pretmpfilename
.get('Filename');
4377 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4380 exec_time_start
= time
.time();
4381 with
open(tmpfilename
, 'rb') as ft
:
4384 databytes
= ft
.read(buffersize
[1]);
4385 if not databytes
: break;
4386 datasize
= len(databytes
);
4387 fulldatasize
= datasize
+ fulldatasize
;
4390 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4391 downloaddiff
= fulldatasize
- prevdownsize
;
4392 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4393 prevdownsize
= fulldatasize
;
4396 fdata
= f
.getvalue();
4399 os
.remove(tmpfilename
);
4400 exec_time_end
= time
.time();
4401 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4402 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4406 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4407 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4410 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4411 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4412 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4415 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4416 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4417 global geturls_download_sleep
, havezstd
, havebrotli
;
4419 sleep
= geturls_download_sleep
;
4422 urlparts
= urlparse
.urlparse(httpurl
);
4423 if(isinstance(httpheaders
, list)):
4424 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4425 httpheaders
= fix_header_names(httpheaders
);
4426 if(httpuseragent
is not None):
4427 if('User-Agent' in httpheaders
):
4428 httpheaders
['User-Agent'] = httpuseragent
;
4430 httpuseragent
.update({'User-Agent': httpuseragent
});
4431 if(httpreferer
is not None):
4432 if('Referer' in httpheaders
):
4433 httpheaders
['Referer'] = httpreferer
;
4435 httpuseragent
.update({'Referer': httpreferer
});
4436 if(urlparts
.username
is not None or urlparts
.password
is not None):
4437 if(sys
.version
[0]=="2"):
4438 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
4439 if(sys
.version
[0]>="3"):
4440 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4441 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4442 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
4443 if(isinstance(httpheaders
, dict)):
4444 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
4445 geturls_opener
.addheaders
= httpheaders
;
4447 if(postdata
is not None and not isinstance(postdata
, dict)):
4448 postdata
= urlencode(postdata
);
4449 retrieved_body
= BytesIO();
4450 retrieved_headers
= BytesIO();
4452 if(httpmethod
=="GET"):
4453 geturls_text
= pycurl
.Curl();
4454 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4455 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4456 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4457 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4458 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4459 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4460 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4461 geturls_text
.perform();
4462 elif(httpmethod
=="POST"):
4463 geturls_text
= pycurl
.Curl();
4464 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4465 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4466 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4467 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4468 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4469 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4470 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4471 geturls_text
.setopt(geturls_text
.POST
, True);
4472 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
4473 geturls_text
.perform();
4475 geturls_text
= pycurl
.Curl();
4476 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4477 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4478 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4479 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4480 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4481 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4482 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4483 geturls_text
.perform();
4484 retrieved_headers
.seek(0);
4485 if(sys
.version
[0]=="2"):
4486 pycurlhead
= retrieved_headers
.read();
4487 if(sys
.version
[0]>="3"):
4488 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
4489 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip().rstrip('\r\n'))[0];
4490 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
4491 retrieved_body
.seek(0);
4492 except socket
.timeout
:
4493 log
.info("Error With URL "+httpurl
);
4495 except socket
.gaierror
:
4496 log
.info("Error With URL "+httpurl
);
4499 log
.info("Error With URL "+httpurl
);
4501 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
4502 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
4503 httpversionout
= pyhttpverinfo
[0];
4504 httpmethodout
= httpmethod
;
4505 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
4506 httpheaderout
= pycurlheadersout
;
4507 httpheadersentout
= httpheaders
;
4508 if(isinstance(httpheaderout
, list)):
4509 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
4510 if(sys
.version
[0]=="2"):
4512 prehttpheaderout
= httpheaderout
;
4513 httpheaderkeys
= httpheaderout
.keys();
4514 imax
= len(httpheaderkeys
);
4518 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4520 except AttributeError:
4522 httpheaderout
= fix_header_names(httpheaderout
);
4523 if(isinstance(httpheadersentout
, list)):
4524 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
4525 httpheadersentout
= fix_header_names(httpheadersentout
);
4526 downloadsize
= httpheaderout
.get('Content-Length');
4527 if(downloadsize
is not None):
4528 downloadsize
= int(downloadsize
);
4529 if downloadsize
is None: downloadsize
= 0;
4532 log
.info("Downloading URL "+httpurl
);
4533 with
BytesIO() as strbuf
:
4535 databytes
= retrieved_body
.read(buffersize
);
4536 if not databytes
: break;
4537 datasize
= len(databytes
);
4538 fulldatasize
= datasize
+ fulldatasize
;
4541 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4542 downloaddiff
= fulldatasize
- prevdownsize
;
4543 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4544 prevdownsize
= fulldatasize
;
4545 strbuf
.write(databytes
);
4547 returnval_content
= strbuf
.read();
4548 if(httpheaderout
.get("Content-Encoding")=="gzip"):
4550 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
4553 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
4555 returnval_content
= zlib
.decompress(returnval_content
);
4558 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4560 returnval_content
= brotli
.decompress(returnval_content
);
4561 except brotli
.error
:
4563 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
4565 returnval_content
= zstandard
.decompress(returnval_content
);
4566 except zstandard
.error
:
4568 elif((httpheaderout
.get("Content-Encoding")=="lzma" or httpheaderout
.get("Content-Encoding")=="xz") and havelzma
):
4570 returnval_content
= lzma
.decompress(returnval_content
);
4571 except zstandard
.error
:
4573 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
4575 returnval_content
= bz2
.decompress(returnval_content
);
4576 except zstandard
.error
:
4578 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl3"};
4579 geturls_text
.close();
4583 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4584 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4587 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4588 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4589 returnval
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4592 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4593 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4594 returnval
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4597 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4598 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4599 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4600 exec_time_start
= time
.time();
4601 myhash
= hashlib
.new("sha1");
4602 if(sys
.version
[0]=="2"):
4603 myhash
.update(httpurl
);
4604 myhash
.update(str(buffersize
));
4605 myhash
.update(str(exec_time_start
));
4606 if(sys
.version
[0]>="3"):
4607 myhash
.update(httpurl
.encode('utf-8'));
4608 myhash
.update(str(buffersize
).encode('utf-8'));
4609 myhash
.update(str(exec_time_start
).encode('utf-8'));
4610 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4612 sleep
= geturls_download_sleep
;
4615 pretmpfilename
= download_from_url_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4616 if(not pretmpfilename
):
4618 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4619 tmpfilename
= f
.name
;
4621 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4622 except AttributeError:
4624 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4629 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4630 f
.write(pretmpfilename
.get('Content'));
4632 exec_time_end
= time
.time();
4633 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4634 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4638 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4639 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4642 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4643 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4644 returnval
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4647 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4648 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4649 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4652 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4653 def download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4654 global geturls_download_sleep
, havezstd
, havebrotli
;
4656 sleep
= geturls_download_sleep
;
4659 if(not outfile
=="-"):
4660 outpath
= outpath
.rstrip(os
.path
.sep
);
4661 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4662 if(not os
.path
.exists(outpath
)):
4663 os
.makedirs(outpath
);
4664 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4666 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4668 pretmpfilename
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4669 if(not pretmpfilename
):
4671 tmpfilename
= pretmpfilename
.get('Filename');
4672 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4674 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4675 exec_time_start
= time
.time();
4676 shutil
.move(tmpfilename
, filepath
);
4678 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4679 except AttributeError:
4681 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4686 exec_time_end
= time
.time();
4687 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4688 if(os
.path
.exists(tmpfilename
)):
4689 os
.remove(tmpfilename
);
4690 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4692 pretmpfilename
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4693 tmpfilename
= pretmpfilename
.get('Filename');
4694 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4697 exec_time_start
= time
.time();
4698 with
open(tmpfilename
, 'rb') as ft
:
4701 databytes
= ft
.read(buffersize
[1]);
4702 if not databytes
: break;
4703 datasize
= len(databytes
);
4704 fulldatasize
= datasize
+ fulldatasize
;
4707 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4708 downloaddiff
= fulldatasize
- prevdownsize
;
4709 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4710 prevdownsize
= fulldatasize
;
4713 fdata
= f
.getvalue();
4716 os
.remove(tmpfilename
);
4717 exec_time_end
= time
.time();
4718 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4719 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4723 def download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4724 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4727 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4728 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4729 returnval
= download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4732 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4733 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4734 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4737 def download_file_from_ftp_file(url
):
4738 urlparts
= urlparse
.urlparse(url
);
4739 file_name
= os
.path
.basename(urlparts
.path
);
4740 file_dir
= os
.path
.dirname(urlparts
.path
);
4741 if(urlparts
.username
is not None):
4742 ftp_username
= urlparts
.username
;
4744 ftp_username
= "anonymous";
4745 if(urlparts
.password
is not None):
4746 ftp_password
= urlparts
.password
;
4747 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4748 ftp_password
= "anonymous";
4751 if(urlparts
.scheme
=="ftp"):
4753 elif(urlparts
.scheme
=="ftps"):
4757 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4759 ftp_port
= urlparts
.port
;
4760 if(urlparts
.port
is None):
4763 ftp
.connect(urlparts
.hostname
, ftp_port
);
4764 except socket
.gaierror
:
4765 log
.info("Error With URL "+httpurl
);
4767 except socket
.timeout
:
4768 log
.info("Error With URL "+httpurl
);
4770 ftp
.login(urlparts
.username
, urlparts
.password
);
4771 if(urlparts
.scheme
=="ftps"):
4773 ftpfile
= BytesIO();
4774 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
4775 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
4780 def download_file_from_ftp_string(url
):
4781 ftpfile
= download_file_from_ftp_file(url
);
4782 return ftpfile
.read();
4784 def download_from_url_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4785 global geturls_download_sleep
, havezstd
, havebrotli
;
4787 sleep
= geturls_download_sleep
;
4790 urlparts
= urlparse
.urlparse(httpurl
);
4791 if(isinstance(httpheaders
, list)):
4792 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4793 httpheaders
= fix_header_names(httpheaders
);
4794 if(httpuseragent
is not None):
4795 if('User-Agent' in httpheaders
):
4796 httpheaders
['User-Agent'] = httpuseragent
;
4798 httpuseragent
.update({'User-Agent': httpuseragent
});
4799 if(httpreferer
is not None):
4800 if('Referer' in httpheaders
):
4801 httpheaders
['Referer'] = httpreferer
;
4803 httpuseragent
.update({'Referer': httpreferer
});
4804 if(isinstance(httpheaders
, dict)):
4805 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4807 geturls_text
= download_file_from_ftp_file(httpurl
);
4808 if(not geturls_text
):
4810 downloadsize
= None;
4811 if(downloadsize
is not None):
4812 downloadsize
= int(downloadsize
);
4813 if downloadsize
is None: downloadsize
= 0;
4816 log
.info("Downloading URL "+httpurl
);
4817 with
BytesIO() as strbuf
:
4819 databytes
= geturls_text
.read(buffersize
);
4820 if not databytes
: break;
4821 datasize
= len(databytes
);
4822 fulldatasize
= datasize
+ fulldatasize
;
4825 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4826 downloaddiff
= fulldatasize
- prevdownsize
;
4827 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4828 prevdownsize
= fulldatasize
;
4829 strbuf
.write(databytes
);
4831 returnval_content
= strbuf
.read();
4832 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4833 geturls_text
.close();
4836 def download_from_url_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4837 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4838 exec_time_start
= time
.time();
4839 myhash
= hashlib
.new("sha1");
4840 if(sys
.version
[0]=="2"):
4841 myhash
.update(httpurl
);
4842 myhash
.update(str(buffersize
));
4843 myhash
.update(str(exec_time_start
));
4844 if(sys
.version
[0]>="3"):
4845 myhash
.update(httpurl
.encode('utf-8'));
4846 myhash
.update(str(buffersize
).encode('utf-8'));
4847 myhash
.update(str(exec_time_start
).encode('utf-8'));
4848 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4850 sleep
= geturls_download_sleep
;
4853 pretmpfilename
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4854 if(not pretmpfilename
):
4856 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4857 tmpfilename
= f
.name
;
4859 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4860 except AttributeError:
4862 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4867 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4868 f
.write(pretmpfilename
.get('Content'));
4870 exec_time_end
= time
.time();
4871 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4872 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4875 def download_from_url_to_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4876 global geturls_download_sleep
, havezstd
, havebrotli
;
4878 sleep
= geturls_download_sleep
;
4881 if(not outfile
=="-"):
4882 outpath
= outpath
.rstrip(os
.path
.sep
);
4883 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4884 if(not os
.path
.exists(outpath
)):
4885 os
.makedirs(outpath
);
4886 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4888 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4890 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4891 if(not pretmpfilename
):
4893 tmpfilename
= pretmpfilename
.get('Filename');
4894 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4896 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4897 exec_time_start
= time
.time();
4898 shutil
.move(tmpfilename
, filepath
);
4899 exec_time_end
= time
.time();
4900 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4901 if(os
.path
.exists(tmpfilename
)):
4902 os
.remove(tmpfilename
);
4903 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4905 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4906 tmpfilename
= pretmpfilename
.get('Filename');
4907 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4910 exec_time_start
= time
.time();
4911 with
open(tmpfilename
, 'rb') as ft
:
4914 databytes
= ft
.read(buffersize
[1]);
4915 if not databytes
: break;
4916 datasize
= len(databytes
);
4917 fulldatasize
= datasize
+ fulldatasize
;
4920 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4921 downloaddiff
= fulldatasize
- prevdownsize
;
4922 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4923 prevdownsize
= fulldatasize
;
4926 fdata
= f
.getvalue();
4929 os
.remove(tmpfilename
);
4930 exec_time_end
= time
.time();
4931 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4932 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4935 def upload_file_to_ftp_file(ftpfile
, url
):
4936 urlparts
= urlparse
.urlparse(url
);
4937 file_name
= os
.path
.basename(urlparts
.path
);
4938 file_dir
= os
.path
.dirname(urlparts
.path
);
4939 if(urlparts
.username
is not None):
4940 ftp_username
= urlparts
.username
;
4942 ftp_username
= "anonymous";
4943 if(urlparts
.password
is not None):
4944 ftp_password
= urlparts
.password
;
4945 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4946 ftp_password
= "anonymous";
4949 if(urlparts
.scheme
=="ftp"):
4951 elif(urlparts
.scheme
=="ftps"):
4955 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4957 ftp_port
= urlparts
.port
;
4958 if(urlparts
.port
is None):
4961 ftp
.connect(urlparts
.hostname
, ftp_port
);
4962 except socket
.gaierror
:
4963 log
.info("Error With URL "+httpurl
);
4965 except socket
.timeout
:
4966 log
.info("Error With URL "+httpurl
);
4968 ftp
.login(urlparts
.username
, urlparts
.password
);
4969 if(urlparts
.scheme
=="ftps"):
4971 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
4976 def upload_file_to_ftp_string(ftpstring
, url
):
4977 ftpfileo
= BytesIO(ftpstring
);
4978 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
4983 def download_file_from_sftp_file(url
):
4984 urlparts
= urlparse
.urlparse(url
);
4985 file_name
= os
.path
.basename(urlparts
.path
);
4986 file_dir
= os
.path
.dirname(urlparts
.path
);
4987 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4989 sftp_port
= urlparts
.port
;
4990 if(urlparts
.port
is None):
4993 sftp_port
= urlparts
.port
;
4994 if(urlparts
.username
is not None):
4995 sftp_username
= urlparts
.username
;
4997 sftp_username
= "anonymous";
4998 if(urlparts
.password
is not None):
4999 sftp_password
= urlparts
.password
;
5000 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5001 sftp_password
= "anonymous";
5004 if(urlparts
.scheme
!="sftp"):
5006 ssh
= paramiko
.SSHClient();
5007 ssh
.load_system_host_keys();
5008 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
5010 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5011 except paramiko
.ssh_exception
.SSHException
:
5013 except socket
.gaierror
:
5014 log
.info("Error With URL "+httpurl
);
5016 except socket
.timeout
:
5017 log
.info("Error With URL "+httpurl
);
5019 sftp
= ssh
.open_sftp();
5020 sftpfile
= BytesIO();
5021 sftp
.getfo(urlparts
.path
, sftpfile
);
5024 sftpfile
.seek(0, 0);
5027 def download_file_from_sftp_file(url
):
5031 def download_file_from_sftp_string(url
):
5032 sftpfile
= download_file_from_sftp_file(url
);
5033 return sftpfile
.read();
5035 def download_file_from_ftp_string(url
):
5039 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5040 global geturls_download_sleep
, havezstd
, havebrotli
;
5042 sleep
= geturls_download_sleep
;
5045 urlparts
= urlparse
.urlparse(httpurl
);
5046 if(isinstance(httpheaders
, list)):
5047 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
5048 httpheaders
= fix_header_names(httpheaders
);
5049 if(httpuseragent
is not None):
5050 if('User-Agent' in httpheaders
):
5051 httpheaders
['User-Agent'] = httpuseragent
;
5053 httpuseragent
.update({'User-Agent': httpuseragent
});
5054 if(httpreferer
is not None):
5055 if('Referer' in httpheaders
):
5056 httpheaders
['Referer'] = httpreferer
;
5058 httpuseragent
.update({'Referer': httpreferer
});
5059 if(isinstance(httpheaders
, dict)):
5060 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
5062 geturls_text
= download_file_from_sftp_file(httpurl
);
5063 if(not geturls_text
):
5065 downloadsize
= None;
5066 if(downloadsize
is not None):
5067 downloadsize
= int(downloadsize
);
5068 if downloadsize
is None: downloadsize
= 0;
5071 log
.info("Downloading URL "+httpurl
);
5072 with
BytesIO() as strbuf
:
5074 databytes
= geturls_text
.read(buffersize
);
5075 if not databytes
: break;
5076 datasize
= len(databytes
);
5077 fulldatasize
= datasize
+ fulldatasize
;
5080 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5081 downloaddiff
= fulldatasize
- prevdownsize
;
5082 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5083 prevdownsize
= fulldatasize
;
5084 strbuf
.write(databytes
);
5086 returnval_content
= strbuf
.read();
5087 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5088 geturls_text
.close();
5091 if(not haveparamiko
):
5092 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5096 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5097 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
5098 exec_time_start
= time
.time();
5099 myhash
= hashlib
.new("sha1");
5100 if(sys
.version
[0]=="2"):
5101 myhash
.update(httpurl
);
5102 myhash
.update(str(buffersize
));
5103 myhash
.update(str(exec_time_start
));
5104 if(sys
.version
[0]>="3"):
5105 myhash
.update(httpurl
.encode('utf-8'));
5106 myhash
.update(str(buffersize
).encode('utf-8'));
5107 myhash
.update(str(exec_time_start
).encode('utf-8'));
5108 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
5110 sleep
= geturls_download_sleep
;
5113 pretmpfilename
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
5114 if(not pretmpfilename
):
5116 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
5117 tmpfilename
= f
.name
;
5119 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
5120 except AttributeError:
5122 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
5127 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5128 f
.write(pretmpfilename
.get('Content'));
5130 exec_time_end
= time
.time();
5131 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
5132 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
5135 if(not haveparamiko
):
5136 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5140 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5141 global geturls_download_sleep
, havezstd
, havebrotli
;
5143 sleep
= geturls_download_sleep
;
5146 if(not outfile
=="-"):
5147 outpath
= outpath
.rstrip(os
.path
.sep
);
5148 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
5149 if(not os
.path
.exists(outpath
)):
5150 os
.makedirs(outpath
);
5151 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
5153 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
5155 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5156 if(not pretmpfilename
):
5158 tmpfilename
= pretmpfilename
.get('Filename');
5159 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5161 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
5162 exec_time_start
= time
.time();
5163 shutil
.move(tmpfilename
, filepath
);
5164 exec_time_end
= time
.time();
5165 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
5166 if(os
.path
.exists(tmpfilename
)):
5167 os
.remove(tmpfilename
);
5168 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5170 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5171 tmpfilename
= pretmpfilename
.get('Filename');
5172 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5175 exec_time_start
= time
.time();
5176 with
open(tmpfilename
, 'rb') as ft
:
5179 databytes
= ft
.read(buffersize
[1]);
5180 if not databytes
: break;
5181 datasize
= len(databytes
);
5182 fulldatasize
= datasize
+ fulldatasize
;
5185 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5186 downloaddiff
= fulldatasize
- prevdownsize
;
5187 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5188 prevdownsize
= fulldatasize
;
5191 fdata
= f
.getvalue();
5194 os
.remove(tmpfilename
);
5195 exec_time_end
= time
.time();
5196 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5197 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5200 if(not haveparamiko
):
5201 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5205 def upload_file_to_sftp_file(sftpfile
, url
):
5206 urlparts
= urlparse
.urlparse(url
);
5207 file_name
= os
.path
.basename(urlparts
.path
);
5208 file_dir
= os
.path
.dirname(urlparts
.path
);
5209 sftp_port
= urlparts
.port
;
5210 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5212 if(urlparts
.port
is None):
5215 sftp_port
= urlparts
.port
;
5216 if(urlparts
.username
is not None):
5217 sftp_username
= urlparts
.username
;
5219 sftp_username
= "anonymous";
5220 if(urlparts
.password
is not None):
5221 sftp_password
= urlparts
.password
;
5222 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5223 sftp_password
= "anonymous";
5226 if(urlparts
.scheme
!="sftp"):
5228 ssh
= paramiko
.SSHClient();
5229 ssh
.load_system_host_keys();
5230 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
5232 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5233 except paramiko
.ssh_exception
.SSHException
:
5235 except socket
.gaierror
:
5236 log
.info("Error With URL "+httpurl
);
5238 except socket
.timeout
:
5239 log
.info("Error With URL "+httpurl
);
5241 sftp
= ssh
.open_sftp();
5242 sftp
.putfo(sftpfile
, urlparts
.path
);
5245 sftpfile
.seek(0, 0);
5248 def upload_file_to_sftp_file(sftpfile
, url
):
5252 def upload_file_to_sftp_string(sftpstring
, url
):
5253 sftpfileo
= BytesIO(sftpstring
);
5254 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
5258 def upload_file_to_sftp_string(url
):
5263 def download_file_from_pysftp_file(url
):
5264 urlparts
= urlparse
.urlparse(url
);
5265 file_name
= os
.path
.basename(urlparts
.path
);
5266 file_dir
= os
.path
.dirname(urlparts
.path
);
5267 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5269 sftp_port
= urlparts
.port
;
5270 if(urlparts
.port
is None):
5273 sftp_port
= urlparts
.port
;
5274 if(urlparts
.username
is not None):
5275 sftp_username
= urlparts
.username
;
5277 sftp_username
= "anonymous";
5278 if(urlparts
.password
is not None):
5279 sftp_password
= urlparts
.password
;
5280 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5281 sftp_password
= "anonymous";
5284 if(urlparts
.scheme
!="sftp"):
5287 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5288 except paramiko
.ssh_exception
.SSHException
:
5290 except socket
.gaierror
:
5291 log
.info("Error With URL "+httpurl
);
5293 except socket
.timeout
:
5294 log
.info("Error With URL "+httpurl
);
5296 sftp
= ssh
.open_sftp();
5297 sftpfile
= BytesIO();
5298 sftp
.getfo(urlparts
.path
, sftpfile
);
5301 sftpfile
.seek(0, 0);
5304 def download_file_from_pysftp_file(url
):
5308 def download_file_from_pysftp_string(url
):
5309 sftpfile
= download_file_from_pysftp_file(url
);
5310 return sftpfile
.read();
5312 def download_file_from_ftp_string(url
):
5316 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5317 global geturls_download_sleep
, havezstd
, havebrotli
;
5319 sleep
= geturls_download_sleep
;
5322 urlparts
= urlparse
.urlparse(httpurl
);
5323 if(isinstance(httpheaders
, list)):
5324 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
5325 httpheaders
= fix_header_names(httpheaders
);
5326 if(isinstance(httpheaders
, dict)):
5327 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
5329 geturls_text
= download_file_from_pysftp_file(httpurl
);
5330 if(not geturls_text
):
5332 downloadsize
= None;
5333 if(downloadsize
is not None):
5334 downloadsize
= int(downloadsize
);
5335 if downloadsize
is None: downloadsize
= 0;
5338 log
.info("Downloading URL "+httpurl
);
5339 with
BytesIO() as strbuf
:
5341 databytes
= geturls_text
.read(buffersize
);
5342 if not databytes
: break;
5343 datasize
= len(databytes
);
5344 fulldatasize
= datasize
+ fulldatasize
;
5347 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5348 downloaddiff
= fulldatasize
- prevdownsize
;
5349 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5350 prevdownsize
= fulldatasize
;
5351 strbuf
.write(databytes
);
5353 returnval_content
= strbuf
.read();
5354 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5355 geturls_text
.close();
5359 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5363 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5364 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
5365 exec_time_start
= time
.time();
5366 myhash
= hashlib
.new("sha1");
5367 if(sys
.version
[0]=="2"):
5368 myhash
.update(httpurl
);
5369 myhash
.update(str(buffersize
));
5370 myhash
.update(str(exec_time_start
));
5371 if(sys
.version
[0]>="3"):
5372 myhash
.update(httpurl
.encode('utf-8'));
5373 myhash
.update(str(buffersize
).encode('utf-8'));
5374 myhash
.update(str(exec_time_start
).encode('utf-8'));
5375 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
5377 sleep
= geturls_download_sleep
;
5380 pretmpfilename
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
5381 if(not pretmpfilename
):
5383 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
5384 tmpfilename
= f
.name
;
5386 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
5387 except AttributeError:
5389 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
5394 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5395 f
.write(pretmpfilename
.get('Content'));
5397 exec_time_end
= time
.time();
5398 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
5399 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
5403 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5407 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5408 global geturls_download_sleep
, havezstd
, havebrotli
;
5410 sleep
= geturls_download_sleep
;
5413 if(not outfile
=="-"):
5414 outpath
= outpath
.rstrip(os
.path
.sep
);
5415 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
5416 if(not os
.path
.exists(outpath
)):
5417 os
.makedirs(outpath
);
5418 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
5420 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
5422 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5423 if(not pretmpfilename
):
5425 tmpfilename
= pretmpfilename
.get('Filename');
5426 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5428 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
5429 exec_time_start
= time
.time();
5430 shutil
.move(tmpfilename
, filepath
);
5431 exec_time_end
= time
.time();
5432 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
5433 if(os
.path
.exists(tmpfilename
)):
5434 os
.remove(tmpfilename
);
5435 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5437 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5438 tmpfilename
= pretmpfilename
.get('Filename');
5439 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5442 exec_time_start
= time
.time();
5443 with
open(tmpfilename
, 'rb') as ft
:
5446 databytes
= ft
.read(buffersize
[1]);
5447 if not databytes
: break;
5448 datasize
= len(databytes
);
5449 fulldatasize
= datasize
+ fulldatasize
;
5452 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5453 downloaddiff
= fulldatasize
- prevdownsize
;
5454 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5455 prevdownsize
= fulldatasize
;
5458 fdata
= f
.getvalue();
5461 os
.remove(tmpfilename
);
5462 exec_time_end
= time
.time();
5463 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5464 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5468 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5472 def upload_file_to_pysftp_file(sftpfile
, url
):
5473 urlparts
= urlparse
.urlparse(url
);
5474 file_name
= os
.path
.basename(urlparts
.path
);
5475 file_dir
= os
.path
.dirname(urlparts
.path
);
5476 sftp_port
= urlparts
.port
;
5477 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5479 if(urlparts
.port
is None):
5482 sftp_port
= urlparts
.port
;
5483 if(urlparts
.username
is not None):
5484 sftp_username
= urlparts
.username
;
5486 sftp_username
= "anonymous";
5487 if(urlparts
.password
is not None):
5488 sftp_password
= urlparts
.password
;
5489 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5490 sftp_password
= "anonymous";
5493 if(urlparts
.scheme
!="sftp"):
5496 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5497 except paramiko
.ssh_exception
.SSHException
:
5499 except socket
.gaierror
:
5500 log
.info("Error With URL "+httpurl
);
5502 except socket
.timeout
:
5503 log
.info("Error With URL "+httpurl
);
5505 sftp
= ssh
.open_sftp();
5506 sftp
.putfo(sftpfile
, urlparts
.path
);
5509 sftpfile
.seek(0, 0);
5512 def upload_file_to_pysftp_file(sftpfile
, url
):
5516 def upload_file_to_pysftp_string(sftpstring
, url
):
5517 sftpfileo
= BytesIO(sftpstring
);
5518 sftpfile
= upload_file_to_pysftp_files(ftpfileo
, url
);
5522 def upload_file_to_pysftp_string(url
):