4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwgetold.py - Last Update: 10/5/2023 Ver. 2.0.2 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, zlib
, time
, argparse
, cgi
, subprocess
, socket
, email
.utils
, datetime
, time
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
29 havemechanize
= False;
34 havemechanize
= False;
62 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
102 if(sys
.version
[0]=="2"):
104 from io
import StringIO
, BytesIO
;
107 from cStringIO
import StringIO
;
108 from cStringIO
import StringIO
as BytesIO
;
110 from StringIO
import StringIO
;
111 from StringIO
import StringIO
as BytesIO
;
112 # From http://python-future.org/compatible_idioms.html
113 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
114 from urllib
import urlencode
;
115 from urllib
import urlopen
as urlopenalt
;
116 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
, URLError
, build_opener
, HTTPCookieProcessor
;
117 import urlparse
, cookielib
;
118 from httplib
import HTTPConnection
, HTTPSConnection
;
119 if(sys
.version
[0]>="3"):
120 from io
import StringIO
, BytesIO
;
121 # From http://python-future.org/compatible_idioms.html
122 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
123 from urllib
.request
import urlopen
, Request
, install_opener
, build_opener
, HTTPCookieProcessor
;
124 from urllib
.error
import HTTPError
, URLError
;
125 import urllib
.parse
as urlparse
;
126 import http
.cookiejar
as cookielib
;
127 from http
.client
import HTTPConnection
, HTTPSConnection
;
129 __program_name__
= "PyWWW-Get";
130 __program_alt_name__
= "PyWWWGet";
131 __program_small_name__
= "wwwget";
132 __project__
= __program_name__
;
133 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
134 __version_info__
= (2, 0, 2, "RC 1", 1);
135 __version_date_info__
= (2023, 10, 5, "RC 1", 1);
136 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
137 __revision__
= __version_info__
[3];
138 __revision_id__
= "$Id$";
139 if(__version_info__
[4] is not None):
140 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
141 if(__version_info__
[4] is None):
142 __version_date_plusrc__
= __version_date__
;
143 if(__version_info__
[3] is not None):
144 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
145 if(__version_info__
[3] is None):
146 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
148 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
150 pytempdir
= tempfile
.gettempdir();
152 PyBitness
= platform
.architecture();
153 if(PyBitness
=="32bit" or PyBitness
=="32"):
155 elif(PyBitness
=="64bit" or PyBitness
=="64"):
160 compression_supported_list
= ['gzip', 'deflate', 'bzip2'];
162 compression_supported_list
.append('br');
164 compression_supported_list
.append('zstd');
166 compression_supported_list
.append('lzma');
167 compression_supported
= ', '.join(compression_supported_list
);
169 geturls_cj
= cookielib
.CookieJar();
170 windowsNT4_ua_string
= "Windows NT 4.0";
171 windowsNT4_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
172 windows2k_ua_string
= "Windows NT 5.0";
173 windows2k_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
174 windowsXP_ua_string
= "Windows NT 5.1";
175 windowsXP_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
176 windowsXP64_ua_string
= "Windows NT 5.2; Win64; x64";
177 windowsXP64_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
178 windows7_ua_string
= "Windows NT 6.1; Win64; x64";
179 windows7_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
180 windows8_ua_string
= "Windows NT 6.2; Win64; x64";
181 windows8_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
182 windows81_ua_string
= "Windows NT 6.3; Win64; x64";
183 windows81_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
184 windows10_ua_string
= "Windows NT 10.0; Win64; x64";
185 windows10_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
186 windows11_ua_string
= "Windows NT 11.0; Win64; x64";
187 windows11_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
188 geturls_ua_firefox_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:109.0) Gecko/20100101 Firefox/117.0";
189 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
190 geturls_ua_chrome_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
191 geturls_ua_chromium_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
192 geturls_ua_palemoon_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
193 geturls_ua_opera_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
194 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
195 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; Trident/7.0; rv:11.0) like Gecko";
196 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
197 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
198 if(platform
.python_implementation()!=""):
199 py_implementation
= platform
.python_implementation();
200 if(platform
.python_implementation()==""):
201 py_implementation
= "Python";
202 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=py_implementation
, pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
203 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
204 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
205 geturls_ua
= geturls_ua_firefox_windows7
;
206 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
207 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
208 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
209 geturls_headers_chrome_windows7
.update(windows7_ua_addon
);
210 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
211 geturls_headers_chromium_windows7
.update(windows7_ua_addon
);
212 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
213 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
214 geturls_headers_opera_windows7
.update(windows7_ua_addon
);
215 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
216 geturls_headers_vivaldi_windows7
.update(windows7_ua_addon
);
217 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
218 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
219 geturls_headers_microsoft_edge_windows7
.update(windows7_ua_addon
);
220 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
221 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
222 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
223 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
224 geturls_headers
= geturls_headers_firefox_windows7
;
225 geturls_download_sleep
= 0;
227 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
228 if(outtype
=="print" and dbgenable
):
231 elif(outtype
=="log" and dbgenable
):
232 logging
.info(dbgtxt
);
234 elif(outtype
=="warning" and dbgenable
):
235 logging
.warning(dbgtxt
);
237 elif(outtype
=="error" and dbgenable
):
238 logging
.error(dbgtxt
);
240 elif(outtype
=="critical" and dbgenable
):
241 logging
.critical(dbgtxt
);
243 elif(outtype
=="exception" and dbgenable
):
244 logging
.exception(dbgtxt
);
246 elif(outtype
=="logalt" and dbgenable
):
247 logging
.log(dgblevel
, dbgtxt
);
249 elif(outtype
=="debug" and dbgenable
):
250 logging
.debug(dbgtxt
);
258 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
259 dbgout
= verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
264 def add_url_param(url
, **params
):
266 parts
= list(urlparse
.urlsplit(url
));
267 d
= dict(cgi
.parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
269 parts
[n
]=urlencode(d
);
270 return urlparse
.urlunsplit(parts
);
272 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
273 def which_exec(execfile):
274 for path
in os
.environ
["PATH"].split(":"):
275 if os
.path
.exists(path
+ "/" + execfile):
276 return path
+ "/" + execfile;
278 def listize(varlist
):
286 newlistreg
.update({ilx
: varlist
[il
]});
287 newlistrev
.update({varlist
[il
]: ilx
});
290 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
293 def twolistize(varlist
):
303 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
304 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
305 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
306 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
309 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
310 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
311 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
314 def arglistize(proexec
, *varlist
):
318 newarglist
= [proexec
];
320 if varlist
[il
][0] is not None:
321 newarglist
.append(varlist
[il
][0]);
322 if varlist
[il
][1] is not None:
323 newarglist
.append(varlist
[il
][1]);
327 def fix_header_names(header_dict
):
328 if(sys
.version
[0]=="2"):
329 header_dict
= {k
.title(): v
for k
, v
in header_dict
.iteritems()};
330 if(sys
.version
[0]>="3"):
331 header_dict
= {k
.title(): v
for k
, v
in header_dict
.items()};
334 # hms_string by ArcGIS Python Recipes
335 # https://arcpy.wordpress.com/2012/04/20/146/
336 def hms_string(sec_elapsed
):
337 h
= int(sec_elapsed
/ (60 * 60));
338 m
= int((sec_elapsed
% (60 * 60)) / 60);
339 s
= sec_elapsed
% 60.0;
340 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
342 # get_readable_size by Lipis
343 # http://stackoverflow.com/posts/14998888/revisions
344 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
346 if(unit
!="IEC" and unit
!="SI"):
349 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
350 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
353 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
354 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
359 if abs(bytes
) < unitsize
:
360 strformat
= "%3."+str(precision
)+"f%s";
361 pre_return_val
= (strformat
% (bytes
, unit
));
362 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
363 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
364 alt_return_val
= pre_return_val
.split();
365 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
368 strformat
= "%."+str(precision
)+"f%s";
369 pre_return_val
= (strformat
% (bytes
, "YiB"));
370 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
371 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
372 alt_return_val
= pre_return_val
.split();
373 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
376 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
378 usehashtypes
= usehashtypes
.lower();
379 getfilesize
= os
.path
.getsize(infile
);
380 return_val
= get_readable_size(getfilesize
, precision
, unit
);
382 hashtypelist
= usehashtypes
.split(",");
383 openfile
= open(infile
, "rb");
384 filecontents
= openfile
.read();
387 listnumend
= len(hashtypelist
);
388 while(listnumcount
< listnumend
):
389 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
390 hashtypelistup
= hashtypelistlow
.upper();
391 filehash
= hashlib
.new(hashtypelistup
);
392 filehash
.update(filecontents
);
393 filegethash
= filehash
.hexdigest();
394 return_val
.update({hashtypelistup
: filegethash
});
398 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
400 usehashtypes
= usehashtypes
.lower();
401 getfilesize
= len(instring
);
402 return_val
= get_readable_size(getfilesize
, precision
, unit
);
404 hashtypelist
= usehashtypes
.split(",");
406 listnumend
= len(hashtypelist
);
407 while(listnumcount
< listnumend
):
408 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
409 hashtypelistup
= hashtypelistlow
.upper();
410 filehash
= hashlib
.new(hashtypelistup
);
411 if(sys
.version
[0]=="2"):
412 filehash
.update(instring
);
413 if(sys
.version
[0]>="3"):
414 filehash
.update(instring
.encode('utf-8'));
415 filegethash
= filehash
.hexdigest();
416 return_val
.update({hashtypelistup
: filegethash
});
420 def http_status_to_reason(code
):
423 101: 'Switching Protocols',
428 203: 'Non-Authoritative Information',
430 205: 'Reset Content',
431 206: 'Partial Content',
433 208: 'Already Reported',
435 300: 'Multiple Choices',
436 301: 'Moved Permanently',
441 307: 'Temporary Redirect',
442 308: 'Permanent Redirect',
445 402: 'Payment Required',
448 405: 'Method Not Allowed',
449 406: 'Not Acceptable',
450 407: 'Proxy Authentication Required',
451 408: 'Request Timeout',
454 411: 'Length Required',
455 412: 'Precondition Failed',
456 413: 'Payload Too Large',
458 415: 'Unsupported Media Type',
459 416: 'Range Not Satisfiable',
460 417: 'Expectation Failed',
461 421: 'Misdirected Request',
462 422: 'Unprocessable Entity',
464 424: 'Failed Dependency',
465 426: 'Upgrade Required',
466 428: 'Precondition Required',
467 429: 'Too Many Requests',
468 431: 'Request Header Fields Too Large',
469 451: 'Unavailable For Legal Reasons',
470 500: 'Internal Server Error',
471 501: 'Not Implemented',
473 503: 'Service Unavailable',
474 504: 'Gateway Timeout',
475 505: 'HTTP Version Not Supported',
476 506: 'Variant Also Negotiates',
477 507: 'Insufficient Storage',
478 508: 'Loop Detected',
480 511: 'Network Authentication Required'
482 return reasons
.get(code
, 'Unknown Status Code');
484 def ftp_status_to_reason(code
):
486 110: 'Restart marker reply',
487 120: 'Service ready in nnn minutes',
488 125: 'Data connection already open; transfer starting',
489 150: 'File status okay; about to open data connection',
491 202: 'Command not implemented, superfluous at this site',
492 211: 'System status, or system help reply',
493 212: 'Directory status',
496 215: 'NAME system type',
497 220: 'Service ready for new user',
498 221: 'Service closing control connection',
499 225: 'Data connection open; no transfer in progress',
500 226: 'Closing data connection',
501 227: 'Entering Passive Mode',
502 230: 'User logged in, proceed',
503 250: 'Requested file action okay, completed',
504 257: '"PATHNAME" created',
505 331: 'User name okay, need password',
506 332: 'Need account for login',
507 350: 'Requested file action pending further information',
508 421: 'Service not available, closing control connection',
509 425: 'Can\'t open data connection',
510 426: 'Connection closed; transfer aborted',
511 450: 'Requested file action not taken',
512 451: 'Requested action aborted. Local error in processing',
513 452: 'Requested action not taken. Insufficient storage space in system',
514 500: 'Syntax error, command unrecognized',
515 501: 'Syntax error in parameters or arguments',
516 502: 'Command not implemented',
517 503: 'Bad sequence of commands',
518 504: 'Command not implemented for that parameter',
519 530: 'Not logged in',
520 532: 'Need account for storing files',
521 550: 'Requested action not taken. File unavailable',
522 551: 'Requested action aborted. Page type unknown',
523 552: 'Requested file action aborted. Exceeded storage allocation',
524 553: 'Requested action not taken. File name not allowed'
526 return reasons
.get(code
, 'Unknown Status Code');
528 def sftp_status_to_reason(code
):
532 2: 'SSH_FX_NO_SUCH_FILE',
533 3: 'SSH_FX_PERMISSION_DENIED',
535 5: 'SSH_FX_BAD_MESSAGE',
536 6: 'SSH_FX_NO_CONNECTION',
537 7: 'SSH_FX_CONNECTION_LOST',
538 8: 'SSH_FX_OP_UNSUPPORTED'
540 return reasons
.get(code
, 'Unknown Status Code');
542 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
543 if isinstance(headers
, dict):
545 if(sys
.version
[0]=="2"):
546 for headkey
, headvalue
in headers
.iteritems():
547 returnval
.append((headkey
, headvalue
));
548 if(sys
.version
[0]>="3"):
549 for headkey
, headvalue
in headers
.items():
550 returnval
.append((headkey
, headvalue
));
551 elif isinstance(headers
, list):
557 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
558 if isinstance(headers
, dict):
560 if(sys
.version
[0]=="2"):
561 for headkey
, headvalue
in headers
.iteritems():
562 returnval
.append(headkey
+": "+headvalue
);
563 if(sys
.version
[0]>="3"):
564 for headkey
, headvalue
in headers
.items():
565 returnval
.append(headkey
+": "+headvalue
);
566 elif isinstance(headers
, list):
572 def make_http_headers_from_pycurl_to_dict(headers
):
574 headers
= headers
.strip().split('\r\n');
575 for header
in headers
:
576 parts
= header
.split(': ', 1)
579 header_dict
[key
.title()] = value
;
582 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", compression_supported
), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
583 if isinstance(headers
, list):
588 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
590 elif isinstance(headers
, dict):
596 def get_httplib_support(checkvalue
=None):
597 global haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
599 returnval
.append("ftp");
600 returnval
.append("httplib");
602 returnval
.append("httplib2");
603 returnval
.append("urllib");
605 returnval
.append("urllib3");
606 returnval
.append("request3");
607 returnval
.append("request");
609 returnval
.append("requests");
611 returnval
.append("aiohttp");
613 returnval
.append("httpx");
614 returnval
.append("httpx2");
616 returnval
.append("mechanize");
618 returnval
.append("pycurl");
619 if(hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
620 returnval
.append("pycurl2");
621 if(hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
622 returnval
.append("pycurl3");
624 returnval
.append("sftp");
626 returnval
.append("pysftp");
627 if(not checkvalue
is None):
628 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
629 checkvalue
= "urllib";
630 if(checkvalue
=="httplib1"):
631 checkvalue
= "httplib";
632 if(checkvalue
in returnval
):
638 def check_httplib_support(checkvalue
="urllib"):
639 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
640 checkvalue
= "urllib";
641 if(checkvalue
=="httplib1"):
642 checkvalue
= "httplib";
643 returnval
= get_httplib_support(checkvalue
);
646 def get_httplib_support_list():
647 returnval
= get_httplib_support(None);
650 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1, timeout
=10):
651 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
653 sleep
= geturls_download_sleep
;
656 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
657 httplibuse
= "urllib";
658 if(httplibuse
=="httplib1"):
659 httplibuse
= "httplib";
660 if(not haverequests
and httplibuse
=="requests"):
661 httplibuse
= "urllib";
662 if(not haveaiohttp
and httplibuse
=="aiohttp"):
663 httplibuse
= "urllib";
664 if(not havehttpx
and httplibuse
=="httpx"):
665 httplibuse
= "urllib";
666 if(not havehttpx
and httplibuse
=="httpx2"):
667 httplibuse
= "urllib";
668 if(not havehttpcore
and httplibuse
=="httpcore"):
669 httplibuse
= "urllib";
670 if(not havehttpcore
and httplibuse
=="httpcore2"):
671 httplibuse
= "urllib";
672 if(not havemechanize
and httplibuse
=="mechanize"):
673 httplibuse
= "urllib";
674 if(not havepycurl
and httplibuse
=="pycurl"):
675 httplibuse
= "urllib";
676 if(not havepycurl
and httplibuse
=="pycurl2"):
677 httplibuse
= "urllib";
678 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
679 httplibuse
= "pycurl";
680 if(not havepycurl
and httplibuse
=="pycurl3"):
681 httplibuse
= "urllib";
682 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
683 httplibuse
= "pycurl2";
684 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
685 httplibuse
= "pycurl";
686 if(not havehttplib2
and httplibuse
=="httplib2"):
687 httplibuse
= "httplib";
688 if(not haveparamiko
and httplibuse
=="sftp"):
690 if(not havepysftp
and httplibuse
=="pysftp"):
692 if(httplibuse
=="urllib" or httplibuse
=="request"):
693 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
694 elif(httplibuse
=="request"):
695 returnval
= download_from_url_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
696 elif(httplibuse
=="request3"):
697 returnval
= download_from_url_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
698 elif(httplibuse
=="httplib"):
699 returnval
= download_from_url_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
700 elif(httplibuse
=="httplib2"):
701 returnval
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
702 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
703 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
704 elif(httplibuse
=="requests"):
705 returnval
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
706 elif(httplibuse
=="aiohttp"):
707 returnval
= download_from_url_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
708 elif(httplibuse
=="httpx"):
709 returnval
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
710 elif(httplibuse
=="httpx2"):
711 returnval
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
712 elif(httplibuse
=="httpcore"):
713 returnval
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
714 elif(httplibuse
=="httpcore2"):
715 returnval
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
716 elif(httplibuse
=="mechanize"):
717 returnval
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
718 elif(httplibuse
=="pycurl"):
719 returnval
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
720 elif(httplibuse
=="pycurl2"):
721 returnval
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
722 elif(httplibuse
=="pycurl3"):
723 returnval
= download_from_url_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
724 elif(httplibuse
=="ftp"):
725 returnval
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
726 elif(httplibuse
=="sftp"):
727 returnval
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
728 elif(httplibuse
=="pysftp"):
729 returnval
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
734 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
735 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
737 sleep
= geturls_download_sleep
;
740 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
741 httplibuse
= "urllib";
742 if(httplibuse
=="httplib1"):
743 httplibuse
= "httplib";
744 if(not haverequests
and httplibuse
=="requests"):
745 httplibuse
= "urllib";
746 if(not haveaiohttp
and httplibuse
=="aiohttp"):
747 httplibuse
= "urllib";
748 if(not havehttpx
and httplibuse
=="httpx"):
749 httplibuse
= "urllib";
750 if(not havehttpx
and httplibuse
=="httpx2"):
751 httplibuse
= "urllib";
752 if(not havehttpcore
and httplibuse
=="httpcore"):
753 httplibuse
= "urllib";
754 if(not havehttpcore
and httplibuse
=="httpcore2"):
755 httplibuse
= "urllib";
756 if(not havemechanize
and httplibuse
=="mechanize"):
757 httplibuse
= "urllib";
758 if(not havepycurl
and httplibuse
=="pycurl"):
759 httplibuse
= "urllib";
760 if(not havepycurl
and httplibuse
=="pycurl2"):
761 httplibuse
= "urllib";
762 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
763 httplibuse
= "pycurl";
764 if(not havepycurl
and httplibuse
=="pycurl3"):
765 httplibuse
= "urllib";
766 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
767 httplibuse
= "pycurl2";
768 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
769 httplibuse
= "pycurl";
770 if(not havehttplib2
and httplibuse
=="httplib2"):
771 httplibuse
= "httplib";
772 if(not haveparamiko
and httplibuse
=="sftp"):
774 if(not haveparamiko
and httplibuse
=="pysftp"):
776 if(httplibuse
=="urllib" or httplibuse
=="request"):
777 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
778 elif(httplibuse
=="request"):
779 returnval
= download_from_url_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
780 elif(httplibuse
=="request3"):
781 returnval
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
782 elif(httplibuse
=="httplib"):
783 returnval
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
784 elif(httplibuse
=="httplib2"):
785 returnval
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
786 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
787 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
788 elif(httplibuse
=="requests"):
789 returnval
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
790 elif(httplibuse
=="aiohttp"):
791 returnval
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
792 elif(httplibuse
=="httpx"):
793 returnval
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
794 elif(httplibuse
=="httpx2"):
795 returnval
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
796 elif(httplibuse
=="httpcore"):
797 returnval
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
798 elif(httplibuse
=="httpcore2"):
799 returnval
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
800 elif(httplibuse
=="mechanize"):
801 returnval
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
802 elif(httplibuse
=="pycurl"):
803 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
804 elif(httplibuse
=="pycurl2"):
805 returnval
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
806 elif(httplibuse
=="pycurl3"):
807 returnval
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
808 elif(httplibuse
=="ftp"):
809 returnval
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
810 elif(httplibuse
=="sftp"):
811 returnval
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
812 elif(httplibuse
=="pysftp"):
813 returnval
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
818 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
819 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
821 sleep
= geturls_download_sleep
;
824 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
825 httplibuse
= "urllib";
826 if(httplibuse
=="httplib1"):
827 httplibuse
= "httplib";
828 if(not haverequests
and httplibuse
=="requests"):
829 httplibuse
= "urllib";
830 if(not haveaiohttp
and httplibuse
=="aiohttp"):
831 httplibuse
= "urllib";
832 if(not havehttpx
and httplibuse
=="httpx"):
833 httplibuse
= "urllib";
834 if(not havehttpx
and httplibuse
=="httpx2"):
835 httplibuse
= "urllib";
836 if(not havehttpcore
and httplibuse
=="httpcore"):
837 httplibuse
= "urllib";
838 if(not havehttpcore
and httplibuse
=="httpcore2"):
839 httplibuse
= "urllib";
840 if(not havemechanize
and httplibuse
=="mechanize"):
841 httplibuse
= "urllib";
842 if(not havepycurl
and httplibuse
=="pycurl"):
843 httplibuse
= "urllib";
844 if(not havepycurl
and httplibuse
=="pycurl2"):
845 httplibuse
= "urllib";
846 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
847 httplibuse
= "pycurl";
848 if(not havepycurl
and httplibuse
=="pycurl3"):
849 httplibuse
= "urllib";
850 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
851 httplibuse
= "pycurl2";
852 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
853 httplibuse
= "pycurl";
854 if(not havehttplib2
and httplibuse
=="httplib2"):
855 httplibuse
= "httplib";
856 if(not haveparamiko
and httplibuse
=="sftp"):
858 if(not havepysftp
and httplibuse
=="pysftp"):
860 if(httplibuse
=="urllib" or httplibuse
=="request"):
861 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
862 elif(httplibuse
=="request"):
863 returnval
= download_from_url_to_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
864 elif(httplibuse
=="request3"):
865 returnval
= download_from_url_to_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
866 elif(httplibuse
=="httplib"):
867 returnval
= download_from_url_to_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
868 elif(httplibuse
=="httplib2"):
869 returnval
= download_from_url_to_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
870 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
871 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
872 elif(httplibuse
=="requests"):
873 returnval
= download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
874 elif(httplibuse
=="aiohttp"):
875 returnval
= download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
876 elif(httplibuse
=="httpx"):
877 returnval
= download_from_url_to_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
878 elif(httplibuse
=="httpx2"):
879 returnval
= download_from_url_to_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
880 elif(httplibuse
=="httpcore"):
881 returnval
= download_from_url_to_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
882 elif(httplibuse
=="httpcore2"):
883 returnval
= download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
884 elif(httplibuse
=="mechanize"):
885 returnval
= download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
886 elif(httplibuse
=="pycurl"):
887 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
888 elif(httplibuse
=="pycurl2"):
889 returnval
= download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
890 elif(httplibuse
=="pycurl3"):
891 returnval
= download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
892 elif(httplibuse
=="ftp"):
893 returnval
= download_from_url_to_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
894 elif(httplibuse
=="sftp"):
895 returnval
= download_from_url_to_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
896 elif(httplibuse
=="pysftp"):
897 returnval
= download_from_url_to_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
902 def download_from_url_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
903 global geturls_download_sleep
, havezstd
, havebrotli
;
905 sleep
= geturls_download_sleep
;
908 urlparts
= urlparse
.urlparse(httpurl
);
909 if(isinstance(httpheaders
, list)):
910 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
911 httpheaders
= fix_header_names(httpheaders
);
912 if(httpuseragent
is not None):
913 if('User-Agent' in httpheaders
):
914 httpheaders
['User-Agent'] = httpuseragent
;
916 httpuseragent
.update({'User-Agent': httpuseragent
});
917 if(httpreferer
is not None):
918 if('Referer' in httpheaders
):
919 httpheaders
['Referer'] = httpreferer
;
921 httpuseragent
.update({'Referer': httpreferer
});
922 if(urlparts
.username
is not None or urlparts
.password
is not None):
923 if(sys
.version
[0]=="2"):
924 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
925 if(sys
.version
[0]>="3"):
926 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
927 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
928 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
929 if(isinstance(httpheaders
, dict)):
930 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
931 geturls_opener
.addheaders
= httpheaders
;
933 if(postdata
is not None and not isinstance(postdata
, dict)):
934 postdata
= urlencode(postdata
);
936 geturls_request
= Request(httpurl
);
937 if(httpmethod
=="GET"):
938 geturls_text
= geturls_opener
.open(geturls_request
);
939 elif(httpmethod
=="POST"):
940 geturls_text
= geturls_opener
.open(geturls_request
, data
=postdata
);
942 geturls_text
= geturls_opener
.open(geturls_request
);
943 except HTTPError
as geturls_text_error
:
944 geturls_text
= geturls_text_error
;
945 log
.info("Error With URL "+httpurl
);
947 log
.info("Error With URL "+httpurl
);
949 except socket
.timeout
:
950 log
.info("Error With URL "+httpurl
);
952 httpcodeout
= geturls_text
.getcode();
954 httpcodereason
= geturls_text
.reason
;
955 except AttributeError:
956 httpcodereason
= http_status_to_reason(geturls_text
.getcode());
958 httpversionout
= geturls_text
.version
;
959 except AttributeError:
960 httpversionout
= "1.1";
961 httpmethodout
= geturls_request
.get_method();
962 httpurlout
= geturls_text
.geturl();
963 httpheaderout
= geturls_text
.info();
964 httpheadersentout
= httpheaders
;
965 if(isinstance(httpheaderout
, list)):
966 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
967 httpheaderout
= fix_header_names(httpheaderout
);
968 if(sys
.version
[0]=="2"):
970 prehttpheaderout
= httpheaderout
;
971 httpheaderkeys
= httpheaderout
.keys();
972 imax
= len(httpheaderkeys
);
976 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
978 except AttributeError:
980 if(isinstance(httpheadersentout
, list)):
981 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
982 httpheadersentout
= fix_header_names(httpheadersentout
);
983 downloadsize
= httpheaderout
.get('Content-Length');
984 if(downloadsize
is not None):
985 downloadsize
= int(downloadsize
);
986 if downloadsize
is None: downloadsize
= 0;
989 log
.info("Downloading URL "+httpurl
);
990 with
BytesIO() as strbuf
:
992 databytes
= geturls_text
.read(buffersize
);
993 if not databytes
: break;
994 datasize
= len(databytes
);
995 fulldatasize
= datasize
+ fulldatasize
;
998 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
999 downloaddiff
= fulldatasize
- prevdownsize
;
1000 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1001 prevdownsize
= fulldatasize
;
1002 strbuf
.write(databytes
);
1004 returnval_content
= strbuf
.read();
1005 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1007 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1010 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1012 returnval_content
= zlib
.decompress(returnval_content
);
1015 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1017 returnval_content
= brotli
.decompress(returnval_content
);
1018 except brotli
.error
:
1020 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1022 returnval_content
= zstandard
.decompress(returnval_content
);
1023 except zstandard
.error
:
1025 elif(httpheaderout
.get("Content-Encoding")=="lzma" and havelzma
):
1027 returnval_content
= lzma
.decompress(returnval_content
);
1028 except zstandard
.error
:
1030 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1032 returnval_content
= bz2
.decompress(returnval_content
);
1033 except zstandard
.error
:
1035 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "urllib"};
1036 geturls_text
.close();
1039 def download_from_url_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1040 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1041 exec_time_start
= time
.time();
1042 myhash
= hashlib
.new("sha1");
1043 if(sys
.version
[0]=="2"):
1044 myhash
.update(httpurl
);
1045 myhash
.update(str(buffersize
));
1046 myhash
.update(str(exec_time_start
));
1047 if(sys
.version
[0]>="3"):
1048 myhash
.update(httpurl
.encode('utf-8'));
1049 myhash
.update(str(buffersize
).encode('utf-8'));
1050 myhash
.update(str(exec_time_start
).encode('utf-8'));
1051 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1053 sleep
= geturls_download_sleep
;
1056 pretmpfilename
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1057 if(not pretmpfilename
):
1059 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1060 tmpfilename
= f
.name
;
1062 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1063 except AttributeError:
1065 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1070 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1071 f
.write(pretmpfilename
.get('Content'));
1073 exec_time_end
= time
.time();
1074 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1075 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1078 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1079 global geturls_download_sleep
, havezstd
, havebrotli
;
1081 sleep
= geturls_download_sleep
;
1084 if(not outfile
=="-"):
1085 outpath
= outpath
.rstrip(os
.path
.sep
);
1086 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1087 if(not os
.path
.exists(outpath
)):
1088 os
.makedirs(outpath
);
1089 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1091 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1093 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1094 if(not pretmpfilename
):
1096 tmpfilename
= pretmpfilename
.get('Filename');
1097 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1099 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1100 exec_time_start
= time
.time();
1101 shutil
.move(tmpfilename
, filepath
);
1103 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1104 except AttributeError:
1106 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1111 exec_time_end
= time
.time();
1112 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1113 if(os
.path
.exists(tmpfilename
)):
1114 os
.remove(tmpfilename
);
1115 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1117 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1118 tmpfilename
= pretmpfilename
.get('Filename');
1119 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1122 exec_time_start
= time
.time();
1123 with
open(tmpfilename
, 'rb') as ft
:
1126 databytes
= ft
.read(buffersize
[1]);
1127 if not databytes
: break;
1128 datasize
= len(databytes
);
1129 fulldatasize
= datasize
+ fulldatasize
;
1132 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1133 downloaddiff
= fulldatasize
- prevdownsize
;
1134 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1135 prevdownsize
= fulldatasize
;
1138 fdata
= f
.getvalue();
1141 os
.remove(tmpfilename
);
1142 exec_time_end
= time
.time();
1143 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1144 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1147 def download_from_url_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1148 global geturls_download_sleep
, havezstd
, havebrotli
, havezstd
, havebrotli
;
1150 sleep
= geturls_download_sleep
;
1153 urlparts
= urlparse
.urlparse(httpurl
);
1154 if(isinstance(httpheaders
, list)):
1155 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1156 httpheaders
= fix_header_names(httpheaders
);
1157 if(httpuseragent
is not None):
1158 if('User-Agent' in httpheaders
):
1159 httpheaders
['User-Agent'] = httpuseragent
;
1161 httpuseragent
.update({'User-Agent': httpuseragent
});
1162 if(httpreferer
is not None):
1163 if('Referer' in httpheaders
):
1164 httpheaders
['Referer'] = httpreferer
;
1166 httpuseragent
.update({'Referer': httpreferer
});
1167 if(urlparts
.username
is not None or urlparts
.password
is not None):
1168 if(sys
.version
[0]=="2"):
1169 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1170 if(sys
.version
[0]>="3"):
1171 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1172 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1173 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1174 geturls_opener
.addheaders
= httpheaders
;
1176 if(urlparts
[0]=="http"):
1177 httpconn
= HTTPConnection(urlparts
[1], timeout
=timeout
);
1178 elif(urlparts
[0]=="https"):
1179 httpconn
= HTTPSConnection(urlparts
[1], timeout
=timeout
);
1182 if(postdata
is not None and not isinstance(postdata
, dict)):
1183 postdata
= urlencode(postdata
);
1185 if(httpmethod
=="GET"):
1186 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1187 elif(httpmethod
=="POST"):
1188 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1190 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1191 except socket
.timeout
:
1192 log
.info("Error With URL "+httpurl
);
1194 except socket
.gaierror
:
1195 log
.info("Error With URL "+httpurl
);
1197 except BlockingIOError
:
1198 log
.info("Error With URL "+httpurl
);
1200 geturls_text
= httpconn
.getresponse();
1201 httpcodeout
= geturls_text
.status
;
1202 httpcodereason
= geturls_text
.reason
;
1203 if(geturls_text
.version
=="10"):
1204 httpversionout
= "1.0";
1206 httpversionout
= "1.1";
1207 httpmethodout
= geturls_text
._method
;
1208 httpurlout
= httpurl
;
1209 httpheaderout
= geturls_text
.getheaders();
1210 httpheadersentout
= httpheaders
;
1211 if(isinstance(httpheaderout
, list)):
1212 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1213 if(sys
.version
[0]=="2"):
1215 prehttpheaderout
= httpheaderout
;
1216 httpheaderkeys
= httpheaderout
.keys();
1217 imax
= len(httpheaderkeys
);
1221 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1223 except AttributeError:
1225 httpheaderout
= fix_header_names(httpheaderout
);
1226 if(isinstance(httpheadersentout
, list)):
1227 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1228 httpheadersentout
= fix_header_names(httpheadersentout
);
1229 downloadsize
= httpheaderout
.get('Content-Length');
1230 if(downloadsize
is not None):
1231 downloadsize
= int(downloadsize
);
1232 if downloadsize
is None: downloadsize
= 0;
1235 log
.info("Downloading URL "+httpurl
);
1236 with
BytesIO() as strbuf
:
1238 databytes
= geturls_text
.read(buffersize
);
1239 if not databytes
: break;
1240 datasize
= len(databytes
);
1241 fulldatasize
= datasize
+ fulldatasize
;
1244 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1245 downloaddiff
= fulldatasize
- prevdownsize
;
1246 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1247 prevdownsize
= fulldatasize
;
1248 strbuf
.write(databytes
);
1250 returnval_content
= strbuf
.read();
1251 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1253 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1256 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1258 returnval_content
= zlib
.decompress(returnval_content
);
1261 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1263 returnval_content
= brotli
.decompress(returnval_content
);
1264 except brotli
.error
:
1266 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1268 returnval_content
= zstandard
.decompress(returnval_content
);
1269 except zstandard
.error
:
1271 elif(httpheaderout
.get("Content-Encoding")=="lzma" and havelzma
):
1273 returnval_content
= lzma
.decompress(returnval_content
);
1274 except zstandard
.error
:
1276 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1278 returnval_content
= bz2
.decompress(returnval_content
);
1279 except zstandard
.error
:
1281 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httplib"};
1282 geturls_text
.close();
1285 def download_from_url_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1286 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1287 exec_time_start
= time
.time();
1288 myhash
= hashlib
.new("sha1");
1289 if(sys
.version
[0]=="2"):
1290 myhash
.update(httpurl
);
1291 myhash
.update(str(buffersize
));
1292 myhash
.update(str(exec_time_start
));
1293 if(sys
.version
[0]>="3"):
1294 myhash
.update(httpurl
.encode('utf-8'));
1295 myhash
.update(str(buffersize
).encode('utf-8'));
1296 myhash
.update(str(exec_time_start
).encode('utf-8'));
1297 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1299 sleep
= geturls_download_sleep
;
1302 pretmpfilename
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1303 if(not pretmpfilename
):
1305 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1306 tmpfilename
= f
.name
;
1308 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1309 except AttributeError:
1311 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1316 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1317 f
.write(pretmpfilename
.get('Content'));
1319 exec_time_end
= time
.time();
1320 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1321 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1324 def download_from_url_to_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1325 global geturls_download_sleep
, havezstd
, havebrotli
;
1327 sleep
= geturls_download_sleep
;
1330 if(not outfile
=="-"):
1331 outpath
= outpath
.rstrip(os
.path
.sep
);
1332 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1333 if(not os
.path
.exists(outpath
)):
1334 os
.makedirs(outpath
);
1335 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1337 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1339 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1340 if(not pretmpfilename
):
1342 tmpfilename
= pretmpfilename
.get('Filename');
1343 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1345 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1346 exec_time_start
= time
.time();
1347 shutil
.move(tmpfilename
, filepath
);
1349 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1350 except AttributeError:
1352 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1357 exec_time_end
= time
.time();
1358 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1359 if(os
.path
.exists(tmpfilename
)):
1360 os
.remove(tmpfilename
);
1361 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1363 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1364 tmpfilename
= pretmpfilename
.get('Filename');
1365 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1368 exec_time_start
= time
.time();
1369 with
open(tmpfilename
, 'rb') as ft
:
1372 databytes
= ft
.read(buffersize
[1]);
1373 if not databytes
: break;
1374 datasize
= len(databytes
);
1375 fulldatasize
= datasize
+ fulldatasize
;
1378 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1379 downloaddiff
= fulldatasize
- prevdownsize
;
1380 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1381 prevdownsize
= fulldatasize
;
1384 fdata
= f
.getvalue();
1387 os
.remove(tmpfilename
);
1388 exec_time_end
= time
.time();
1389 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1390 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1394 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1395 global geturls_download_sleep
, havezstd
, havebrotli
;
1397 sleep
= geturls_download_sleep
;
1400 urlparts
= urlparse
.urlparse(httpurl
);
1401 if(isinstance(httpheaders
, list)):
1402 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1403 httpheaders
= fix_header_names(httpheaders
);
1404 if(httpuseragent
is not None):
1405 if('User-Agent' in httpheaders
):
1406 httpheaders
['User-Agent'] = httpuseragent
;
1408 httpuseragent
.update({'User-Agent': httpuseragent
});
1409 if(httpreferer
is not None):
1410 if('Referer' in httpheaders
):
1411 httpheaders
['Referer'] = httpreferer
;
1413 httpuseragent
.update({'Referer': httpreferer
});
1414 if(urlparts
.username
is not None or urlparts
.password
is not None):
1415 if(sys
.version
[0]=="2"):
1416 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1417 if(sys
.version
[0]>="3"):
1418 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1419 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1420 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1421 geturls_opener
.addheaders
= httpheaders
;
1423 if(urlparts
[0]=="http"):
1424 httpconn
= HTTPConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
1425 elif(urlparts
[0]=="https"):
1426 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
1429 if(postdata
is not None and not isinstance(postdata
, dict)):
1430 postdata
= urlencode(postdata
);
1432 if(httpmethod
=="GET"):
1433 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1434 elif(httpmethod
=="POST"):
1435 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1437 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1438 except socket
.timeout
:
1439 log
.info("Error With URL "+httpurl
);
1441 except socket
.gaierror
:
1442 log
.info("Error With URL "+httpurl
);
1444 except BlockingIOError
:
1445 log
.info("Error With URL "+httpurl
);
1447 geturls_text
= httpconn
.getresponse();
1448 httpcodeout
= geturls_text
.status
;
1449 httpcodereason
= geturls_text
.reason
;
1450 if(geturls_text
.version
=="10"):
1451 httpversionout
= "1.0";
1453 httpversionout
= "1.1";
1454 httpmethodout
= httpmethod
;
1455 httpurlout
= httpurl
;
1456 httpheaderout
= geturls_text
.getheaders();
1457 httpheadersentout
= httpheaders
;
1458 if(isinstance(httpheaderout
, list)):
1459 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1460 if(sys
.version
[0]=="2"):
1462 prehttpheaderout
= httpheaderout
;
1463 httpheaderkeys
= httpheaderout
.keys();
1464 imax
= len(httpheaderkeys
);
1468 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1470 except AttributeError:
1472 httpheaderout
= fix_header_names(httpheaderout
);
1473 if(isinstance(httpheadersentout
, list)):
1474 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1475 httpheadersentout
= fix_header_names(httpheadersentout
);
1476 downloadsize
= httpheaderout
.get('Content-Length');
1477 if(downloadsize
is not None):
1478 downloadsize
= int(downloadsize
);
1479 if downloadsize
is None: downloadsize
= 0;
1482 log
.info("Downloading URL "+httpurl
);
1483 with
BytesIO() as strbuf
:
1485 databytes
= geturls_text
.read(buffersize
);
1486 if not databytes
: break;
1487 datasize
= len(databytes
);
1488 fulldatasize
= datasize
+ fulldatasize
;
1491 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1492 downloaddiff
= fulldatasize
- prevdownsize
;
1493 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1494 prevdownsize
= fulldatasize
;
1495 strbuf
.write(databytes
);
1497 returnval_content
= strbuf
.read();
1498 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1500 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1503 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1505 returnval_content
= zlib
.decompress(returnval_content
);
1508 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1510 returnval_content
= brotli
.decompress(returnval_content
);
1511 except brotli
.error
:
1513 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1515 returnval_content
= zstandard
.decompress(returnval_content
);
1516 except zstandard
.error
:
1518 elif(httpheaderout
.get("Content-Encoding")=="lzma" and havelzma
):
1520 returnval_content
= lzma
.decompress(returnval_content
);
1521 except zstandard
.error
:
1523 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1525 returnval_content
= bz2
.decompress(returnval_content
);
1526 except zstandard
.error
:
1528 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httplib2"};
1529 geturls_text
.close();
1532 if(not havehttplib2
):
1533 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1534 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1538 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1539 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1540 exec_time_start
= time
.time();
1541 myhash
= hashlib
.new("sha1");
1542 if(sys
.version
[0]=="2"):
1543 myhash
.update(httpurl
);
1544 myhash
.update(str(buffersize
));
1545 myhash
.update(str(exec_time_start
));
1546 if(sys
.version
[0]>="3"):
1547 myhash
.update(httpurl
.encode('utf-8'));
1548 myhash
.update(str(buffersize
).encode('utf-8'));
1549 myhash
.update(str(exec_time_start
).encode('utf-8'));
1550 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1552 sleep
= geturls_download_sleep
;
1555 pretmpfilename
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1556 if(not pretmpfilename
):
1558 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1559 tmpfilename
= f
.name
;
1561 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1562 except AttributeError:
1564 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1569 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1570 f
.write(pretmpfilename
.get('Content'));
1572 exec_time_end
= time
.time();
1573 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1574 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1577 if(not havehttplib2
):
1578 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1579 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1583 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1584 global geturls_download_sleep
, havezstd
, havebrotli
;
1586 sleep
= geturls_download_sleep
;
1589 if(not outfile
=="-"):
1590 outpath
= outpath
.rstrip(os
.path
.sep
);
1591 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1592 if(not os
.path
.exists(outpath
)):
1593 os
.makedirs(outpath
);
1594 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1596 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1598 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1599 if(not pretmpfilename
):
1601 tmpfilename
= pretmpfilename
.get('Filename');
1602 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1604 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1605 exec_time_start
= time
.time();
1606 shutil
.move(tmpfilename
, filepath
);
1608 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1609 except AttributeError:
1611 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1616 exec_time_end
= time
.time();
1617 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1618 if(os
.path
.exists(tmpfilename
)):
1619 os
.remove(tmpfilename
);
1620 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1622 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1623 tmpfilename
= pretmpfilename
.get('Filename');
1624 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1627 exec_time_start
= time
.time();
1628 with
open(tmpfilename
, 'rb') as ft
:
1631 databytes
= ft
.read(buffersize
[1]);
1632 if not databytes
: break;
1633 datasize
= len(databytes
);
1634 fulldatasize
= datasize
+ fulldatasize
;
1637 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1638 downloaddiff
= fulldatasize
- prevdownsize
;
1639 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1640 prevdownsize
= fulldatasize
;
1643 fdata
= f
.getvalue();
1646 os
.remove(tmpfilename
);
1647 exec_time_end
= time
.time();
1648 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1649 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1652 if(not havehttplib2
):
1653 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1654 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1657 def download_from_url_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1658 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1661 def download_from_url_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1662 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1665 def download_from_url_to_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1666 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1670 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1671 global geturls_download_sleep
, havezstd
, havebrotli
;
1673 sleep
= geturls_download_sleep
;
1676 urlparts
= urlparse
.urlparse(httpurl
);
1677 if(isinstance(httpheaders
, list)):
1678 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1679 httpheaders
= fix_header_names(httpheaders
);
1680 if(httpuseragent
is not None):
1681 if('User-Agent' in httpheaders
):
1682 httpheaders
['User-Agent'] = httpuseragent
;
1684 httpuseragent
.update({'User-Agent': httpuseragent
});
1685 if(httpreferer
is not None):
1686 if('Referer' in httpheaders
):
1687 httpheaders
['Referer'] = httpreferer
;
1689 httpuseragent
.update({'Referer': httpreferer
});
1690 if(urlparts
.username
is not None or urlparts
.password
is not None):
1691 if(sys
.version
[0]=="2"):
1692 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1693 if(sys
.version
[0]>="3"):
1694 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1695 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1697 if(postdata
is not None and not isinstance(postdata
, dict)):
1698 postdata
= urlencode(postdata
);
1700 reqsession
= requests
.Session();
1701 if(httpmethod
=="GET"):
1702 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1703 elif(httpmethod
=="POST"):
1704 geturls_text
= reqsession
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1706 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1707 except requests
.exceptions
.ConnectTimeout
:
1708 log
.info("Error With URL "+httpurl
);
1710 except requests
.exceptions
.ConnectError
:
1711 log
.info("Error With URL "+httpurl
);
1713 except socket
.timeout
:
1714 log
.info("Error With URL "+httpurl
);
1716 httpcodeout
= geturls_text
.status_code
;
1717 httpcodereason
= geturls_text
.reason
;
1718 if(geturls_text
.raw
.version
=="10"):
1719 httpversionout
= "1.0";
1721 httpversionout
= "1.1";
1722 httpmethodout
= httpmethod
;
1723 httpurlout
= geturls_text
.url
;
1724 httpheaderout
= geturls_text
.headers
;
1725 httpheadersentout
= geturls_text
.request
.headers
;
1726 if(isinstance(httpheaderout
, list)):
1727 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1728 if(sys
.version
[0]=="2"):
1730 prehttpheaderout
= httpheaderout
;
1731 httpheaderkeys
= httpheaderout
.keys();
1732 imax
= len(httpheaderkeys
);
1736 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1738 except AttributeError:
1740 httpheaderout
= fix_header_names(httpheaderout
);
1741 if(isinstance(httpheadersentout
, list)):
1742 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1743 httpheadersentout
= fix_header_names(httpheadersentout
);
1744 downloadsize
= httpheaderout
.get('Content-Length');
1745 if(downloadsize
is not None):
1746 downloadsize
= int(downloadsize
);
1747 if downloadsize
is None: downloadsize
= 0;
1750 log
.info("Downloading URL "+httpurl
);
1751 with
BytesIO() as strbuf
:
1753 databytes
= geturls_text
.raw
.read(buffersize
);
1754 if not databytes
: break;
1755 datasize
= len(databytes
);
1756 fulldatasize
= datasize
+ fulldatasize
;
1759 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1760 downloaddiff
= fulldatasize
- prevdownsize
;
1761 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1762 prevdownsize
= fulldatasize
;
1763 strbuf
.write(databytes
);
1765 returnval_content
= strbuf
.read();
1766 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1768 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1771 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1773 returnval_content
= zlib
.decompress(returnval_content
);
1776 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1778 returnval_content
= brotli
.decompress(returnval_content
);
1779 except brotli
.error
:
1781 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1783 returnval_content
= zstandard
.decompress(returnval_content
);
1784 except zstandard
.error
:
1786 elif(httpheaderout
.get("Content-Encoding")=="lzma" and havelzma
):
1788 returnval_content
= lzma
.decompress(returnval_content
);
1789 except zstandard
.error
:
1791 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
1793 returnval_content
= bz2
.decompress(returnval_content
);
1794 except zstandard
.error
:
1796 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "requests"};
1797 geturls_text
.close();
1800 if(not haverequests
):
1801 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1802 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1806 def download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1807 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1808 exec_time_start
= time
.time();
1809 myhash
= hashlib
.new("sha1");
1810 if(sys
.version
[0]=="2"):
1811 myhash
.update(httpurl
);
1812 myhash
.update(str(buffersize
));
1813 myhash
.update(str(exec_time_start
));
1814 if(sys
.version
[0]>="3"):
1815 myhash
.update(httpurl
.encode('utf-8'));
1816 myhash
.update(str(buffersize
).encode('utf-8'));
1817 myhash
.update(str(exec_time_start
).encode('utf-8'));
1818 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1820 sleep
= geturls_download_sleep
;
1823 pretmpfilename
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1824 if(not pretmpfilename
):
1826 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1827 tmpfilename
= f
.name
;
1829 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1830 except AttributeError:
1832 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1837 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1838 f
.write(pretmpfilename
.get('Content'));
1840 exec_time_end
= time
.time();
1841 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1842 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1845 if(not haverequests
):
1846 def download_from_url_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1847 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1851 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1852 global geturls_download_sleep
, havezstd
, havebrotli
;
1854 sleep
= geturls_download_sleep
;
1857 if(not outfile
=="-"):
1858 outpath
= outpath
.rstrip(os
.path
.sep
);
1859 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1860 if(not os
.path
.exists(outpath
)):
1861 os
.makedirs(outpath
);
1862 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1864 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1866 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1867 if(not pretmpfilename
):
1869 tmpfilename
= pretmpfilename
.get('Filename');
1870 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1872 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1873 exec_time_start
= time
.time();
1874 shutil
.move(tmpfilename
, filepath
);
1876 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1877 except AttributeError:
1879 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1884 exec_time_end
= time
.time();
1885 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1886 if(os
.path
.exists(tmpfilename
)):
1887 os
.remove(tmpfilename
);
1888 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1890 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1891 tmpfilename
= pretmpfilename
.get('Filename');
1892 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1895 exec_time_start
= time
.time();
1896 with
open(tmpfilename
, 'rb') as ft
:
1899 databytes
= ft
.read(buffersize
[1]);
1900 if not databytes
: break;
1901 datasize
= len(databytes
);
1902 fulldatasize
= datasize
+ fulldatasize
;
1905 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1906 downloaddiff
= fulldatasize
- prevdownsize
;
1907 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1908 prevdownsize
= fulldatasize
;
1911 fdata
= f
.getvalue();
1914 os
.remove(tmpfilename
);
1915 exec_time_end
= time
.time();
1916 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1917 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
1920 if(not haverequests
):
1921 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1922 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1926 def download_from_url_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1927 global geturls_download_sleep
, havezstd
, havebrotli
;
1929 sleep
= geturls_download_sleep
;
1932 urlparts
= urlparse
.urlparse(httpurl
);
1933 if(isinstance(httpheaders
, list)):
1934 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1935 httpheaders
= fix_header_names(httpheaders
);
1936 if(httpuseragent
is not None):
1937 if('User-Agent' in httpheaders
):
1938 httpheaders
['User-Agent'] = httpuseragent
;
1940 httpuseragent
.update({'User-Agent': httpuseragent
});
1941 if(httpreferer
is not None):
1942 if('Referer' in httpheaders
):
1943 httpheaders
['Referer'] = httpreferer
;
1945 httpuseragent
.update({'Referer': httpreferer
});
1946 if(urlparts
.username
is not None or urlparts
.password
is not None):
1947 if(sys
.version
[0]=="2"):
1948 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1949 if(sys
.version
[0]>="3"):
1950 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1951 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1953 if(postdata
is not None and not isinstance(postdata
, dict)):
1954 postdata
= urlencode(postdata
);
1956 reqsession
= aiohttp
.ClientSession(cookie_jar
=httpcookie
, headers
=httpheaders
, timeout
=timeout
, read_timeout
=timeout
, conn_timeout
=timeout
, read_bufsize
=buffersize
);
1957 if(httpmethod
=="GET"):
1958 geturls_text
= reqsession
.get(httpurl
);
1959 elif(httpmethod
=="POST"):
1960 geturls_text
= reqsession
.post(httpurl
, data
=postdata
);
1962 geturls_text
= reqsession
.get(httpurl
);
1963 except aiohttp
.exceptions
.ConnectTimeout
:
1964 log
.info("Error With URL "+httpurl
);
1966 except aiohttp
.exceptions
.ConnectError
:
1967 log
.info("Error With URL "+httpurl
);
1969 except socket
.timeout
:
1970 log
.info("Error With URL "+httpurl
);
1972 httpcodeout
= geturls_text
.status
;
1973 httpcodereason
= geturls_text
.reason
;
1974 httpversionout
= geturls_text
.version
;
1975 httpmethodout
= geturls_text
.method
;
1976 httpurlout
= geturls_text
.url
;
1977 httpheaderout
= geturls_text
.headers
;
1978 httpheadersentout
= geturls_text
.request_info
.headers
;
1979 if(isinstance(httpheaderout
, list)):
1980 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1981 if(sys
.version
[0]=="2"):
1983 prehttpheaderout
= httpheaderout
;
1984 httpheaderkeys
= httpheaderout
.keys();
1985 imax
= len(httpheaderkeys
);
1989 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1991 except AttributeError:
1993 httpheaderout
= fix_header_names(httpheaderout
);
1994 if(isinstance(httpheadersentout
, list)):
1995 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1996 httpheadersentout
= fix_header_names(httpheadersentout
);
1997 downloadsize
= httpheaderout
.get('Content-Length');
1998 if(downloadsize
is not None):
1999 downloadsize
= int(downloadsize
);
2000 if downloadsize
is None: downloadsize
= 0;
2003 log
.info("Downloading URL "+httpurl
);
2004 with
BytesIO() as strbuf
:
2006 databytes
= geturls_text
.read(buffersize
);
2007 if not databytes
: break;
2008 datasize
= len(databytes
);
2009 fulldatasize
= datasize
+ fulldatasize
;
2012 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2013 downloaddiff
= fulldatasize
- prevdownsize
;
2014 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2015 prevdownsize
= fulldatasize
;
2016 strbuf
.write(databytes
);
2018 returnval_content
= strbuf
.read();
2019 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2021 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2024 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2026 returnval_content
= zlib
.decompress(returnval_content
);
2029 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2031 returnval_content
= brotli
.decompress(returnval_content
);
2032 except brotli
.error
:
2034 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2036 returnval_content
= zstandard
.decompress(returnval_content
);
2037 except zstandard
.error
:
2039 elif(httpheaderout
.get("Content-Encoding")=="lzma" and havelzma
):
2041 returnval_content
= lzma
.decompress(returnval_content
);
2042 except zstandard
.error
:
2044 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2046 returnval_content
= bz2
.decompress(returnval_content
);
2047 except zstandard
.error
:
2049 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "aiohttp"};
2050 geturls_text
.close();
2053 if(not haveaiohttp
):
2054 def download_from_url_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2055 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2059 def download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2060 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2061 exec_time_start
= time
.time();
2062 myhash
= hashlib
.new("sha1");
2063 if(sys
.version
[0]=="2"):
2064 myhash
.update(httpurl
);
2065 myhash
.update(str(buffersize
));
2066 myhash
.update(str(exec_time_start
));
2067 if(sys
.version
[0]>="3"):
2068 myhash
.update(httpurl
.encode('utf-8'));
2069 myhash
.update(str(buffersize
).encode('utf-8'));
2070 myhash
.update(str(exec_time_start
).encode('utf-8'));
2071 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2073 sleep
= geturls_download_sleep
;
2076 pretmpfilename
= download_from_url_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2077 if(not pretmpfilename
):
2079 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2080 tmpfilename
= f
.name
;
2082 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2083 except AttributeError:
2085 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2090 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2091 f
.write(pretmpfilename
.get('Content'));
2093 exec_time_end
= time
.time();
2094 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2095 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2098 if(not haveaiohttp
):
2099 def download_from_url_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2100 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2104 def download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2105 global geturls_download_sleep
, havezstd
, havebrotli
;
2107 sleep
= geturls_download_sleep
;
2110 if(not outfile
=="-"):
2111 outpath
= outpath
.rstrip(os
.path
.sep
);
2112 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2113 if(not os
.path
.exists(outpath
)):
2114 os
.makedirs(outpath
);
2115 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2117 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2119 pretmpfilename
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2120 if(not pretmpfilename
):
2122 tmpfilename
= pretmpfilename
.get('Filename');
2123 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2125 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2126 exec_time_start
= time
.time();
2127 shutil
.move(tmpfilename
, filepath
);
2129 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2130 except AttributeError:
2132 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2137 exec_time_end
= time
.time();
2138 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2139 if(os
.path
.exists(tmpfilename
)):
2140 os
.remove(tmpfilename
);
2141 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2143 pretmpfilename
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2144 tmpfilename
= pretmpfilename
.get('Filename');
2145 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2148 exec_time_start
= time
.time();
2149 with
open(tmpfilename
, 'rb') as ft
:
2152 databytes
= ft
.read(buffersize
[1]);
2153 if not databytes
: break;
2154 datasize
= len(databytes
);
2155 fulldatasize
= datasize
+ fulldatasize
;
2158 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2159 downloaddiff
= fulldatasize
- prevdownsize
;
2160 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2161 prevdownsize
= fulldatasize
;
2164 fdata
= f
.getvalue();
2167 os
.remove(tmpfilename
);
2168 exec_time_end
= time
.time();
2169 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2170 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2173 if(not haveaiohttp
):
2174 def download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2175 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2179 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2180 global geturls_download_sleep
, havezstd
, havebrotli
;
2182 sleep
= geturls_download_sleep
;
2185 urlparts
= urlparse
.urlparse(httpurl
);
2186 if(isinstance(httpheaders
, list)):
2187 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2188 httpheaders
= fix_header_names(httpheaders
);
2189 if(httpuseragent
is not None):
2190 if('User-Agent' in httpheaders
):
2191 httpheaders
['User-Agent'] = httpuseragent
;
2193 httpuseragent
.update({'User-Agent': httpuseragent
});
2194 if(httpreferer
is not None):
2195 if('Referer' in httpheaders
):
2196 httpheaders
['Referer'] = httpreferer
;
2198 httpuseragent
.update({'Referer': httpreferer
});
2199 if(urlparts
.username
is not None or urlparts
.password
is not None):
2200 if(sys
.version
[0]=="2"):
2201 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2202 if(sys
.version
[0]>="3"):
2203 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2204 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2206 if(postdata
is not None and not isinstance(postdata
, dict)):
2207 postdata
= urlencode(postdata
);
2209 if(httpmethod
=="GET"):
2210 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2211 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2212 elif(httpmethod
=="POST"):
2213 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2214 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2216 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2217 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2218 except httpx
.ConnectTimeout
:
2219 log
.info("Error With URL "+httpurl
);
2221 except httpx
.ConnectError
:
2222 log
.info("Error With URL "+httpurl
);
2224 except socket
.timeout
:
2225 log
.info("Error With URL "+httpurl
);
2227 httpcodeout
= geturls_text
.status_code
;
2229 httpcodereason
= geturls_text
.reason_phrase
;
2231 httpcodereason
= http_status_to_reason(geturls_text
.status_code
);
2232 httpversionout
= geturls_text
.http_version
;
2233 httpmethodout
= httpmethod
;
2234 httpurlout
= str(geturls_text
.url
);
2235 httpheaderout
= geturls_text
.headers
;
2236 httpheadersentout
= geturls_text
.request
.headers
;
2237 if(isinstance(httpheaderout
, list)):
2238 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2239 if(sys
.version
[0]=="2"):
2241 prehttpheaderout
= httpheaderout
;
2242 httpheaderkeys
= httpheaderout
.keys();
2243 imax
= len(httpheaderkeys
);
2247 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2249 except AttributeError:
2251 httpheaderout
= fix_header_names(httpheaderout
);
2252 if(isinstance(httpheadersentout
, list)):
2253 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2254 httpheadersentout
= fix_header_names(httpheadersentout
);
2255 downloadsize
= httpheaderout
.get('Content-Length');
2256 if(downloadsize
is not None):
2257 downloadsize
= int(downloadsize
);
2258 if downloadsize
is None: downloadsize
= 0;
2261 log
.info("Downloading URL "+httpurl
);
2262 with
BytesIO() as strbuf
2263 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
2264 datasize
= len(databytes
);
2265 fulldatasize
= datasize
+ fulldatasize
;
2268 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2269 downloaddiff
= fulldatasize
- prevdownsize
;
2270 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2271 prevdownsize
= fulldatasize
;
2272 strbuf
.write(databytes
);
2274 returnval_content
= strbuf
.read();
2275 geturls_text
.close();
2276 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2278 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2281 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2283 returnval_content
= zlib
.decompress(returnval_content
);
2286 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2288 returnval_content
= brotli
.decompress(returnval_content
);
2289 except brotli
.error
:
2291 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2293 returnval_content
= zstandard
.decompress(returnval_content
);
2294 except zstandard
.error
:
2296 elif(httpheaderout
.get("Content-Encoding")=="lzma" and havelzma
):
2298 returnval_content
= lzma
.decompress(returnval_content
);
2299 except zstandard
.error
:
2301 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2303 returnval_content
= bz2
.decompress(returnval_content
);
2304 except zstandard
.error
:
2306 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpx"};
2307 geturls_text
.close();
2311 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2312 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2316 def download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2317 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2318 exec_time_start
= time
.time();
2319 myhash
= hashlib
.new("sha1");
2320 if(sys
.version
[0]=="2"):
2321 myhash
.update(httpurl
);
2322 myhash
.update(str(buffersize
));
2323 myhash
.update(str(exec_time_start
));
2324 if(sys
.version
[0]>="3"):
2325 myhash
.update(httpurl
.encode('utf-8'));
2326 myhash
.update(str(buffersize
).encode('utf-8'));
2327 myhash
.update(str(exec_time_start
).encode('utf-8'));
2328 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2330 sleep
= geturls_download_sleep
;
2333 pretmpfilename
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2334 if(not pretmpfilename
):
2336 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2337 tmpfilename
= f
.name
;
2339 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2340 except AttributeError:
2342 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2347 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2348 f
.write(pretmpfilename
.get('Content'));
2350 exec_time_end
= time
.time();
2351 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2352 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2356 def download_from_url_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2357 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2361 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2362 global geturls_download_sleep
, havezstd
, havebrotli
;
2364 sleep
= geturls_download_sleep
;
2367 if(not outfile
=="-"):
2368 outpath
= outpath
.rstrip(os
.path
.sep
);
2369 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2370 if(not os
.path
.exists(outpath
)):
2371 os
.makedirs(outpath
);
2372 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2374 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2376 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2377 if(not pretmpfilename
):
2379 tmpfilename
= pretmpfilename
.get('Filename');
2380 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2382 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2383 exec_time_start
= time
.time();
2384 shutil
.move(tmpfilename
, filepath
);
2386 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2387 except AttributeError:
2389 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2394 exec_time_end
= time
.time();
2395 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2396 if(os
.path
.exists(tmpfilename
)):
2397 os
.remove(tmpfilename
);
2398 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2400 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2401 tmpfilename
= pretmpfilename
.get('Filename');
2402 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2405 exec_time_start
= time
.time();
2406 with
open(tmpfilename
, 'rb') as ft
:
2409 databytes
= ft
.read(buffersize
[1]);
2410 if not databytes
: break;
2411 datasize
= len(databytes
);
2412 fulldatasize
= datasize
+ fulldatasize
;
2415 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2416 downloaddiff
= fulldatasize
- prevdownsize
;
2417 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2418 prevdownsize
= fulldatasize
;
2421 fdata
= f
.getvalue();
2424 os
.remove(tmpfilename
);
2425 exec_time_end
= time
.time();
2426 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2427 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2431 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2432 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2436 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2437 global geturls_download_sleep
, havezstd
, havebrotli
;
2439 sleep
= geturls_download_sleep
;
2442 urlparts
= urlparse
.urlparse(httpurl
);
2443 if(isinstance(httpheaders
, list)):
2444 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2445 httpheaders
= fix_header_names(httpheaders
);
2446 if(httpuseragent
is not None):
2447 if('User-Agent' in httpheaders
):
2448 httpheaders
['User-Agent'] = httpuseragent
;
2450 httpuseragent
.update({'User-Agent': httpuseragent
});
2451 if(httpreferer
is not None):
2452 if('Referer' in httpheaders
):
2453 httpheaders
['Referer'] = httpreferer
;
2455 httpuseragent
.update({'Referer': httpreferer
});
2456 if(urlparts
.username
is not None or urlparts
.password
is not None):
2457 if(sys
.version
[0]=="2"):
2458 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2459 if(sys
.version
[0]>="3"):
2460 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2461 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2463 if(postdata
is not None and not isinstance(postdata
, dict)):
2464 postdata
= urlencode(postdata
);
2466 if(httpmethod
=="GET"):
2467 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2468 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2469 elif(httpmethod
=="POST"):
2470 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2471 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2473 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2474 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2475 except httpx
.ConnectTimeout
:
2476 log
.info("Error With URL "+httpurl
);
2478 except httpx
.ConnectError
:
2479 log
.info("Error With URL "+httpurl
);
2481 except socket
.timeout
:
2482 log
.info("Error With URL "+httpurl
);
2484 httpcodeout
= geturls_text
.status_code
;
2486 httpcodereason
= geturls_text
.reason_phrase
;
2488 httpcodereason
= http_status_to_reason(geturls_text
.status_code
);
2489 httpversionout
= geturls_text
.http_version
;
2490 httpmethodout
= httpmethod
;
2491 httpurlout
= str(geturls_text
.url
);
2492 httpheaderout
= geturls_text
.headers
;
2493 httpheadersentout
= geturls_text
.request
.headers
;
2494 if(isinstance(httpheaderout
, list)):
2495 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2496 if(sys
.version
[0]=="2"):
2498 prehttpheaderout
= httpheaderout
;
2499 httpheaderkeys
= httpheaderout
.keys();
2500 imax
= len(httpheaderkeys
);
2504 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2506 except AttributeError:
2508 httpheaderout
= fix_header_names(httpheaderout
);
2509 if(isinstance(httpheadersentout
, list)):
2510 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2511 httpheadersentout
= fix_header_names(httpheadersentout
);
2512 downloadsize
= httpheaderout
.get('Content-Length');
2513 if(downloadsize
is not None):
2514 downloadsize
= int(downloadsize
);
2515 if downloadsize
is None: downloadsize
= 0;
2518 log
.info("Downloading URL "+httpurl
);
2519 with
BytesIO() as strbuf
2520 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
2521 datasize
= len(databytes
);
2522 fulldatasize
= datasize
+ fulldatasize
;
2525 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2526 downloaddiff
= fulldatasize
- prevdownsize
;
2527 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2528 prevdownsize
= fulldatasize
;
2529 strbuf
.write(databytes
);
2531 returnval_content
= strbuf
.read();
2532 geturls_text
.close();
2533 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2535 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2538 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2540 returnval_content
= zlib
.decompress(returnval_content
);
2543 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2545 returnval_content
= brotli
.decompress(returnval_content
);
2546 except brotli
.error
:
2548 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2550 returnval_content
= zstandard
.decompress(returnval_content
);
2551 except zstandard
.error
:
2553 elif(httpheaderout
.get("Content-Encoding")=="lzma" and havelzma
):
2555 returnval_content
= lzma
.decompress(returnval_content
);
2556 except zstandard
.error
:
2558 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2560 returnval_content
= bz2
.decompress(returnval_content
);
2561 except zstandard
.error
:
2563 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpx2"};
2564 geturls_text
.close();
2568 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2569 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2573 def download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2574 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2575 exec_time_start
= time
.time();
2576 myhash
= hashlib
.new("sha1");
2577 if(sys
.version
[0]=="2"):
2578 myhash
.update(httpurl
);
2579 myhash
.update(str(buffersize
));
2580 myhash
.update(str(exec_time_start
));
2581 if(sys
.version
[0]>="3"):
2582 myhash
.update(httpurl
.encode('utf-8'));
2583 myhash
.update(str(buffersize
).encode('utf-8'));
2584 myhash
.update(str(exec_time_start
).encode('utf-8'));
2585 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2587 sleep
= geturls_download_sleep
;
2590 pretmpfilename
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2591 if(not pretmpfilename
):
2593 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2594 tmpfilename
= f
.name
;
2596 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2597 except AttributeError:
2599 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2604 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2605 f
.write(pretmpfilename
.get('Content'));
2607 exec_time_end
= time
.time();
2608 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2609 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2613 def download_from_url_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2614 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2618 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2619 global geturls_download_sleep
, havezstd
, havebrotli
;
2621 sleep
= geturls_download_sleep
;
2624 if(not outfile
=="-"):
2625 outpath
= outpath
.rstrip(os
.path
.sep
);
2626 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2627 if(not os
.path
.exists(outpath
)):
2628 os
.makedirs(outpath
);
2629 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2631 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2633 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2634 if(not pretmpfilename
):
2636 tmpfilename
= pretmpfilename
.get('Filename');
2637 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2639 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2640 exec_time_start
= time
.time();
2641 shutil
.move(tmpfilename
, filepath
);
2643 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2644 except AttributeError:
2646 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2651 exec_time_end
= time
.time();
2652 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2653 if(os
.path
.exists(tmpfilename
)):
2654 os
.remove(tmpfilename
);
2655 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2657 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2658 tmpfilename
= pretmpfilename
.get('Filename');
2659 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2662 exec_time_start
= time
.time();
2663 with
open(tmpfilename
, 'rb') as ft
:
2666 databytes
= ft
.read(buffersize
[1]);
2667 if not databytes
: break;
2668 datasize
= len(databytes
);
2669 fulldatasize
= datasize
+ fulldatasize
;
2672 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2673 downloaddiff
= fulldatasize
- prevdownsize
;
2674 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2675 prevdownsize
= fulldatasize
;
2678 fdata
= f
.getvalue();
2681 os
.remove(tmpfilename
);
2682 exec_time_end
= time
.time();
2683 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2684 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2688 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2689 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2693 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2694 global geturls_download_sleep
, havezstd
, havebrotli
;
2696 sleep
= geturls_download_sleep
;
2699 urlparts
= urlparse
.urlparse(httpurl
);
2700 if(isinstance(httpheaders
, list)):
2701 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2702 httpheaders
= fix_header_names(httpheaders
);
2703 if(httpuseragent
is not None):
2704 if('User-Agent' in httpheaders
):
2705 httpheaders
['User-Agent'] = httpuseragent
;
2707 httpuseragent
.update({'User-Agent': httpuseragent
});
2708 if(httpreferer
is not None):
2709 if('Referer' in httpheaders
):
2710 httpheaders
['Referer'] = httpreferer
;
2712 httpuseragent
.update({'Referer': httpreferer
});
2713 if(urlparts
.username
is not None or urlparts
.password
is not None):
2714 if(sys
.version
[0]=="2"):
2715 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2716 if(sys
.version
[0]>="3"):
2717 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2718 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2720 if(postdata
is not None and not isinstance(postdata
, dict)):
2721 postdata
= urlencode(postdata
);
2723 if(httpmethod
=="GET"):
2724 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2725 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2726 elif(httpmethod
=="POST"):
2727 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2728 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2730 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2731 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2732 except httpcore
.ConnectTimeout
:
2733 log
.info("Error With URL "+httpurl
);
2735 except httpcore
.ConnectError
:
2736 log
.info("Error With URL "+httpurl
);
2738 except socket
.timeout
:
2739 log
.info("Error With URL "+httpurl
);
2741 httpcodeout
= geturls_text
.status
;
2742 httpcodereason
= http_status_to_reason(geturls_text
.status
);
2743 httpversionout
= "1.1";
2744 httpmethodout
= httpmethod
;
2745 httpurlout
= str(httpurl
);
2746 httpheaderout
= geturls_text
.headers
;
2747 httpheadersentout
= httpheaders
;
2748 if(isinstance(httpheaderout
, list)):
2749 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2750 if(sys
.version
[0]=="2"):
2752 prehttpheaderout
= httpheaderout
;
2753 httpheaderkeys
= httpheaderout
.keys();
2754 imax
= len(httpheaderkeys
);
2758 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2760 except AttributeError:
2762 httpheaderout
= fix_header_names(httpheaderout
);
2763 if(isinstance(httpheadersentout
, list)):
2764 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2765 httpheadersentout
= fix_header_names(httpheadersentout
);
2766 downloadsize
= httpheaderout
.get('Content-Length');
2767 if(downloadsize
is not None):
2768 downloadsize
= int(downloadsize
);
2769 if downloadsize
is None: downloadsize
= 0;
2772 log
.info("Downloading URL "+httpurl
);
2773 with
BytesIO() as strbuf
2774 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
2775 datasize
= len(databytes
);
2776 fulldatasize
= datasize
+ fulldatasize
;
2779 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2780 downloaddiff
= fulldatasize
- prevdownsize
;
2781 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2782 prevdownsize
= fulldatasize
;
2783 strbuf
.write(databytes
);
2785 returnval_content
= strbuf
.read();
2786 geturls_text
.close();
2787 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2789 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2792 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2794 returnval_content
= zlib
.decompress(returnval_content
);
2797 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2799 returnval_content
= brotli
.decompress(returnval_content
);
2800 except brotli
.error
:
2802 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2804 returnval_content
= zstandard
.decompress(returnval_content
);
2805 except zstandard
.error
:
2807 elif(httpheaderout
.get("Content-Encoding")=="lzma" and havelzma
):
2809 returnval_content
= lzma
.decompress(returnval_content
);
2810 except zstandard
.error
:
2812 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
2814 returnval_content
= bz2
.decompress(returnval_content
);
2815 except zstandard
.error
:
2817 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpcore"};
2818 geturls_text
.close();
2821 if(not havehttpcore
):
2822 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2823 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2827 def download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2828 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2829 exec_time_start
= time
.time();
2830 myhash
= hashlib
.new("sha1");
2831 if(sys
.version
[0]=="2"):
2832 myhash
.update(httpurl
);
2833 myhash
.update(str(buffersize
));
2834 myhash
.update(str(exec_time_start
));
2835 if(sys
.version
[0]>="3"):
2836 myhash
.update(httpurl
.encode('utf-8'));
2837 myhash
.update(str(buffersize
).encode('utf-8'));
2838 myhash
.update(str(exec_time_start
).encode('utf-8'));
2839 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2841 sleep
= geturls_download_sleep
;
2844 pretmpfilename
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2845 if(not pretmpfilename
):
2847 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2848 tmpfilename
= f
.name
;
2850 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2851 except AttributeError:
2853 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2858 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2859 f
.write(pretmpfilename
.get('Content'));
2861 exec_time_end
= time
.time();
2862 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2863 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2866 if(not havehttpcore
):
2867 def download_from_url_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2868 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2872 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2873 global geturls_download_sleep
, havezstd
, havebrotli
;
2875 sleep
= geturls_download_sleep
;
2878 if(not outfile
=="-"):
2879 outpath
= outpath
.rstrip(os
.path
.sep
);
2880 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2881 if(not os
.path
.exists(outpath
)):
2882 os
.makedirs(outpath
);
2883 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2885 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2887 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2888 if(not pretmpfilename
):
2890 tmpfilename
= pretmpfilename
.get('Filename');
2891 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2893 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2894 exec_time_start
= time
.time();
2895 shutil
.move(tmpfilename
, filepath
);
2897 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2898 except AttributeError:
2900 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2905 exec_time_end
= time
.time();
2906 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2907 if(os
.path
.exists(tmpfilename
)):
2908 os
.remove(tmpfilename
);
2909 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2911 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2912 tmpfilename
= pretmpfilename
.get('Filename');
2913 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2916 exec_time_start
= time
.time();
2917 with
open(tmpfilename
, 'rb') as ft
:
2920 databytes
= ft
.read(buffersize
[1]);
2921 if not databytes
: break;
2922 datasize
= len(databytes
);
2923 fulldatasize
= datasize
+ fulldatasize
;
2926 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2927 downloaddiff
= fulldatasize
- prevdownsize
;
2928 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2929 prevdownsize
= fulldatasize
;
2932 fdata
= f
.getvalue();
2935 os
.remove(tmpfilename
);
2936 exec_time_end
= time
.time();
2937 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2938 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
2941 if(not havehttpcore
):
2942 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2943 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2947 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2948 global geturls_download_sleep
, havezstd
, havebrotli
;
2950 sleep
= geturls_download_sleep
;
2953 urlparts
= urlparse
.urlparse(httpurl
);
2954 if(isinstance(httpheaders
, list)):
2955 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2956 httpheaders
= fix_header_names(httpheaders
);
2957 if(httpuseragent
is not None):
2958 if('User-Agent' in httpheaders
):
2959 httpheaders
['User-Agent'] = httpuseragent
;
2961 httpuseragent
.update({'User-Agent': httpuseragent
});
2962 if(httpreferer
is not None):
2963 if('Referer' in httpheaders
):
2964 httpheaders
['Referer'] = httpreferer
;
2966 httpuseragent
.update({'Referer': httpreferer
});
2967 if(urlparts
.username
is not None or urlparts
.password
is not None):
2968 if(sys
.version
[0]=="2"):
2969 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2970 if(sys
.version
[0]>="3"):
2971 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2972 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2974 if(postdata
is not None and not isinstance(postdata
, dict)):
2975 postdata
= urlencode(postdata
);
2977 if(httpmethod
=="GET"):
2978 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2979 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2980 elif(httpmethod
=="POST"):
2981 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2982 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2984 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2985 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2986 except httpcore
.ConnectTimeout
:
2987 log
.info("Error With URL "+httpurl
);
2989 except httpcore
.ConnectError
:
2990 log
.info("Error With URL "+httpurl
);
2992 except socket
.timeout
:
2993 log
.info("Error With URL "+httpurl
);
2995 httpcodeout
= geturls_text
.status
;
2996 httpcodereason
= http_status_to_reason(geturls_text
.status
);
2997 httpversionout
= "1.1";
2998 httpmethodout
= httpmethod
;
2999 httpurlout
= str(httpurl
);
3000 httpheaderout
= geturls_text
.headers
;
3001 httpheadersentout
= httpheaders
;
3002 if(isinstance(httpheaderout
, list)):
3003 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3004 if(sys
.version
[0]=="2"):
3006 prehttpheaderout
= httpheaderout
;
3007 httpheaderkeys
= httpheaderout
.keys();
3008 imax
= len(httpheaderkeys
);
3012 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3014 except AttributeError:
3016 httpheaderout
= fix_header_names(httpheaderout
);
3017 if(isinstance(httpheadersentout
, list)):
3018 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3019 httpheadersentout
= fix_header_names(httpheadersentout
);
3020 downloadsize
= httpheaderout
.get('Content-Length');
3021 if(downloadsize
is not None):
3022 downloadsize
= int(downloadsize
);
3023 if downloadsize
is None: downloadsize
= 0;
3026 log
.info("Downloading URL "+httpurl
);
3027 with
BytesIO() as strbuf
3028 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
3029 datasize
= len(databytes
);
3030 fulldatasize
= datasize
+ fulldatasize
;
3033 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3034 downloaddiff
= fulldatasize
- prevdownsize
;
3035 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3036 prevdownsize
= fulldatasize
;
3037 strbuf
.write(databytes
);
3039 returnval_content
= strbuf
.read();
3040 geturls_text
.close();
3041 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3043 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3046 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3048 returnval_content
= zlib
.decompress(returnval_content
);
3051 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3053 returnval_content
= brotli
.decompress(returnval_content
);
3054 except brotli
.error
:
3056 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3058 returnval_content
= zstandard
.decompress(returnval_content
);
3059 except zstandard
.error
:
3061 elif(httpheaderout
.get("Content-Encoding")=="lzma" and havelzma
):
3063 returnval_content
= lzma
.decompress(returnval_content
);
3064 except zstandard
.error
:
3066 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3068 returnval_content
= bz2
.decompress(returnval_content
);
3069 except zstandard
.error
:
3071 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "httpcore2"};
3072 geturls_text
.close();
3075 if(not havehttpcore
):
3076 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3077 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3081 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3082 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3083 exec_time_start
= time
.time();
3084 myhash
= hashlib
.new("sha1");
3085 if(sys
.version
[0]=="2"):
3086 myhash
.update(httpurl
);
3087 myhash
.update(str(buffersize
));
3088 myhash
.update(str(exec_time_start
));
3089 if(sys
.version
[0]>="3"):
3090 myhash
.update(httpurl
.encode('utf-8'));
3091 myhash
.update(str(buffersize
).encode('utf-8'));
3092 myhash
.update(str(exec_time_start
).encode('utf-8'));
3093 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3095 sleep
= geturls_download_sleep
;
3098 pretmpfilename
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3099 if(not pretmpfilename
):
3101 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3102 tmpfilename
= f
.name
;
3104 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3105 except AttributeError:
3107 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3112 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3113 f
.write(pretmpfilename
.get('Content'));
3115 exec_time_end
= time
.time();
3116 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3117 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3120 if(not havehttpcore
):
3121 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3122 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3126 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3127 global geturls_download_sleep
, havezstd
, havebrotli
;
3129 sleep
= geturls_download_sleep
;
3132 if(not outfile
=="-"):
3133 outpath
= outpath
.rstrip(os
.path
.sep
);
3134 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3135 if(not os
.path
.exists(outpath
)):
3136 os
.makedirs(outpath
);
3137 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3139 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3141 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3142 if(not pretmpfilename
):
3144 tmpfilename
= pretmpfilename
.get('Filename');
3145 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3147 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3148 exec_time_start
= time
.time();
3149 shutil
.move(tmpfilename
, filepath
);
3151 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3152 except AttributeError:
3154 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3159 exec_time_end
= time
.time();
3160 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3161 if(os
.path
.exists(tmpfilename
)):
3162 os
.remove(tmpfilename
);
3163 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3165 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3166 tmpfilename
= pretmpfilename
.get('Filename');
3167 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3170 exec_time_start
= time
.time();
3171 with
open(tmpfilename
, 'rb') as ft
:
3174 databytes
= ft
.read(buffersize
[1]);
3175 if not databytes
: break;
3176 datasize
= len(databytes
);
3177 fulldatasize
= datasize
+ fulldatasize
;
3180 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3181 downloaddiff
= fulldatasize
- prevdownsize
;
3182 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3183 prevdownsize
= fulldatasize
;
3186 fdata
= f
.getvalue();
3189 os
.remove(tmpfilename
);
3190 exec_time_end
= time
.time();
3191 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3192 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3196 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3197 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3201 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3202 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3205 if(not haveurllib3
):
3206 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3207 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3211 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3212 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3215 if(not haveurllib3
):
3216 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3217 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3221 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3222 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3225 if(not haveurllib3
):
3226 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3227 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3231 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3232 global geturls_download_sleep
, havezstd
, havebrotli
;
3234 sleep
= geturls_download_sleep
;
3237 urlparts
= urlparse
.urlparse(httpurl
);
3238 if(isinstance(httpheaders
, list)):
3239 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3240 httpheaders
= fix_header_names(httpheaders
);
3241 if(httpuseragent
is not None):
3242 if('User-Agent' in httpheaders
):
3243 httpheaders
['User-Agent'] = httpuseragent
;
3245 httpuseragent
.update({'User-Agent': httpuseragent
});
3246 if(httpreferer
is not None):
3247 if('Referer' in httpheaders
):
3248 httpheaders
['Referer'] = httpreferer
;
3250 httpuseragent
.update({'Referer': httpreferer
});
3251 if(urlparts
.username
is not None or urlparts
.password
is not None):
3252 if(sys
.version
[0]=="2"):
3253 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3254 if(sys
.version
[0]>="3"):
3255 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3256 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3258 timeout
= urllib3
.util
.Timeout(connect
=timeout
, read
=timeout
);
3259 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
, timeout
=timeout
);
3260 if(postdata
is not None and not isinstance(postdata
, dict)):
3261 postdata
= urlencode(postdata
);
3263 if(httpmethod
=="GET"):
3264 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3265 elif(httpmethod
=="POST"):
3266 geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
3268 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3269 except urllib3
.exceptions
.ConnectTimeoutError
:
3270 log
.info("Error With URL "+httpurl
);
3272 except urllib3
.exceptions
.ConnectError
:
3273 log
.info("Error With URL "+httpurl
);
3275 except urllib3
.exceptions
.MaxRetryError
:
3276 log
.info("Error With URL "+httpurl
);
3278 except socket
.timeout
:
3279 log
.info("Error With URL "+httpurl
);
3282 log
.info("Error With URL "+httpurl
);
3284 httpcodeout
= geturls_text
.status
;
3285 httpcodereason
= geturls_text
.reason
;
3286 if(geturls_text
.version
=="10"):
3287 httpversionout
= "1.0";
3289 httpversionout
= "1.1";
3290 httpmethodout
= httpmethod
;
3291 httpurlout
= geturls_text
.geturl();
3292 httpheaderout
= geturls_text
.info();
3293 httpheadersentout
= httpheaders
;
3294 if(isinstance(httpheaderout
, list)):
3295 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3296 if(sys
.version
[0]=="2"):
3298 prehttpheaderout
= httpheaderout
;
3299 httpheaderkeys
= httpheaderout
.keys();
3300 imax
= len(httpheaderkeys
);
3304 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3306 except AttributeError:
3308 httpheaderout
= fix_header_names(httpheaderout
);
3309 if(isinstance(httpheadersentout
, list)):
3310 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3311 httpheadersentout
= fix_header_names(httpheadersentout
);
3312 downloadsize
= httpheaderout
.get('Content-Length');
3313 if(downloadsize
is not None):
3314 downloadsize
= int(downloadsize
);
3315 if downloadsize
is None: downloadsize
= 0;
3318 log
.info("Downloading URL "+httpurl
);
3319 with
BytesIO() as strbuf
:
3321 databytes
= geturls_text
.read(buffersize
);
3322 if not databytes
: break;
3323 datasize
= len(databytes
);
3324 fulldatasize
= datasize
+ fulldatasize
;
3327 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3328 downloaddiff
= fulldatasize
- prevdownsize
;
3329 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3330 prevdownsize
= fulldatasize
;
3331 strbuf
.write(databytes
);
3333 returnval_content
= strbuf
.read();
3334 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3336 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3339 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3341 returnval_content
= zlib
.decompress(returnval_content
);
3344 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3346 returnval_content
= brotli
.decompress(returnval_content
);
3347 except brotli
.error
:
3349 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3351 returnval_content
= zstandard
.decompress(returnval_content
);
3352 except zstandard
.error
:
3354 elif(httpheaderout
.get("Content-Encoding")=="lzma" and havelzma
):
3356 returnval_content
= lzma
.decompress(returnval_content
);
3357 except zstandard
.error
:
3359 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3361 returnval_content
= bz2
.decompress(returnval_content
);
3362 except zstandard
.error
:
3364 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "urllib3"};
3365 geturls_text
.close();
3368 if(not haveurllib3
):
3369 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3370 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3374 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3375 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3376 exec_time_start
= time
.time();
3377 myhash
= hashlib
.new("sha1");
3378 if(sys
.version
[0]=="2"):
3379 myhash
.update(httpurl
);
3380 myhash
.update(str(buffersize
));
3381 myhash
.update(str(exec_time_start
));
3382 if(sys
.version
[0]>="3"):
3383 myhash
.update(httpurl
.encode('utf-8'));
3384 myhash
.update(str(buffersize
).encode('utf-8'));
3385 myhash
.update(str(exec_time_start
).encode('utf-8'));
3386 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3388 sleep
= geturls_download_sleep
;
3391 pretmpfilename
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3392 if(not pretmpfilename
):
3394 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3395 tmpfilename
= f
.name
;
3397 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3398 except AttributeError:
3400 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3405 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3406 f
.write(pretmpfilename
.get('Content'));
3408 exec_time_end
= time
.time();
3409 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3410 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3413 if(not haveurllib3
):
3414 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3415 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3419 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3420 global geturls_download_sleep
, havezstd
, havebrotli
;
3422 sleep
= geturls_download_sleep
;
3425 if(not outfile
=="-"):
3426 outpath
= outpath
.rstrip(os
.path
.sep
);
3427 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3428 if(not os
.path
.exists(outpath
)):
3429 os
.makedirs(outpath
);
3430 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3432 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3434 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3435 if(not pretmpfilename
):
3437 tmpfilename
= pretmpfilename
.get('Filename');
3438 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3440 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3441 exec_time_start
= time
.time();
3442 shutil
.move(tmpfilename
, filepath
);
3444 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3445 except AttributeError:
3447 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3452 exec_time_end
= time
.time();
3453 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3454 if(os
.path
.exists(tmpfilename
)):
3455 os
.remove(tmpfilename
);
3456 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3458 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3459 tmpfilename
= pretmpfilename
.get('Filename');
3460 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3463 exec_time_start
= time
.time();
3464 with
open(tmpfilename
, 'rb') as ft
:
3467 databytes
= ft
.read(buffersize
[1]);
3468 if not databytes
: break;
3469 datasize
= len(databytes
);
3470 fulldatasize
= datasize
+ fulldatasize
;
3473 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3474 downloaddiff
= fulldatasize
- prevdownsize
;
3475 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3476 prevdownsize
= fulldatasize
;
3479 fdata
= f
.getvalue();
3482 os
.remove(tmpfilename
);
3483 exec_time_end
= time
.time();
3484 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3485 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3488 if(not haveurllib3
):
3489 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3490 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3494 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3495 global geturls_download_sleep
, havezstd
, havebrotli
;
3497 sleep
= geturls_download_sleep
;
3500 urlparts
= urlparse
.urlparse(httpurl
);
3501 if(isinstance(httpheaders
, list)):
3502 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3503 httpheaders
= fix_header_names(httpheaders
);
3504 if(httpuseragent
is not None):
3505 if('User-Agent' in httpheaders
):
3506 httpheaders
['User-Agent'] = httpuseragent
;
3508 httpuseragent
.update({'User-Agent': httpuseragent
});
3509 if(httpreferer
is not None):
3510 if('Referer' in httpheaders
):
3511 httpheaders
['Referer'] = httpreferer
;
3513 httpuseragent
.update({'Referer': httpreferer
});
3514 if(urlparts
.username
is not None or urlparts
.password
is not None):
3515 if(sys
.version
[0]=="2"):
3516 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3517 if(sys
.version
[0]>="3"):
3518 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3519 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3520 geturls_opener
= mechanize
.Browser();
3521 if(isinstance(httpheaders
, dict)):
3522 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
3524 geturls_opener
.addheaders
= httpheaders
;
3525 geturls_opener
.set_cookiejar(httpcookie
);
3526 geturls_opener
.set_handle_robots(False);
3527 if(postdata
is not None and not isinstance(postdata
, dict)):
3528 postdata
= urlencode(postdata
);
3530 if(httpmethod
=="GET"):
3531 geturls_text
= geturls_opener
.open(httpurl
);
3532 elif(httpmethod
=="POST"):
3533 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
3535 geturls_text
= geturls_opener
.open(httpurl
);
3536 except mechanize
.HTTPError
as geturls_text_error
:
3537 geturls_text
= geturls_text_error
;
3538 log
.info("Error With URL "+httpurl
);
3540 log
.info("Error With URL "+httpurl
);
3542 except socket
.timeout
:
3543 log
.info("Error With URL "+httpurl
);
3545 httpcodeout
= geturls_text
.code
;
3546 httpcodereason
= geturls_text
.msg
;
3547 httpversionout
= "1.1";
3548 httpmethodout
= httpmethod
;
3549 httpurlout
= geturls_text
.geturl();
3550 httpheaderout
= geturls_text
.info();
3551 reqhead
= geturls_opener
.request
;
3552 httpheadersentout
= reqhead
.header_items();
3553 if(isinstance(httpheaderout
, list)):
3554 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3555 if(sys
.version
[0]=="2"):
3557 prehttpheaderout
= httpheaderout
;
3558 httpheaderkeys
= httpheaderout
.keys();
3559 imax
= len(httpheaderkeys
);
3563 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3565 except AttributeError:
3567 httpheaderout
= fix_header_names(httpheaderout
);
3568 if(isinstance(httpheadersentout
, list)):
3569 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3570 httpheadersentout
= fix_header_names(httpheadersentout
);
3571 downloadsize
= httpheaderout
.get('Content-Length');
3572 if(downloadsize
is not None):
3573 downloadsize
= int(downloadsize
);
3574 if downloadsize
is None: downloadsize
= 0;
3577 log
.info("Downloading URL "+httpurl
);
3578 with
BytesIO() as strbuf
:
3580 databytes
= geturls_text
.read(buffersize
);
3581 if not databytes
: break;
3582 datasize
= len(databytes
);
3583 fulldatasize
= datasize
+ fulldatasize
;
3586 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3587 downloaddiff
= fulldatasize
- prevdownsize
;
3588 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3589 prevdownsize
= fulldatasize
;
3590 strbuf
.write(databytes
);
3592 returnval_content
= strbuf
.read();
3593 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3595 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3598 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3600 returnval_content
= zlib
.decompress(returnval_content
);
3603 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3605 returnval_content
= brotli
.decompress(returnval_content
);
3606 except brotli
.error
:
3608 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3610 returnval_content
= zstandard
.decompress(returnval_content
);
3611 except zstandard
.error
:
3613 elif(httpheaderout
.get("Content-Encoding")=="lzma" and havelzma
):
3615 returnval_content
= lzma
.decompress(returnval_content
);
3616 except zstandard
.error
:
3618 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3620 returnval_content
= bz2
.decompress(returnval_content
);
3621 except zstandard
.error
:
3623 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "mechanize"};
3624 geturls_text
.close();
3627 if(not havemechanize
):
3628 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3629 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3633 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3634 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3635 exec_time_start
= time
.time();
3636 myhash
= hashlib
.new("sha1");
3637 if(sys
.version
[0]=="2"):
3638 myhash
.update(httpurl
);
3639 myhash
.update(str(buffersize
));
3640 myhash
.update(str(exec_time_start
));
3641 if(sys
.version
[0]>="3"):
3642 myhash
.update(httpurl
.encode('utf-8'));
3643 myhash
.update(str(buffersize
).encode('utf-8'));
3644 myhash
.update(str(exec_time_start
).encode('utf-8'));
3645 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3647 sleep
= geturls_download_sleep
;
3650 pretmpfilename
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3651 if(not pretmpfilename
):
3653 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3654 tmpfilename
= f
.name
;
3656 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3657 except AttributeError:
3659 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3664 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3665 f
.write(pretmpfilename
.get('Content'));
3667 exec_time_end
= time
.time();
3668 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3669 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3672 if(not havemechanize
):
3673 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3674 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3678 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3679 global geturls_download_sleep
, havezstd
, havebrotli
;
3681 sleep
= geturls_download_sleep
;
3684 if(not outfile
=="-"):
3685 outpath
= outpath
.rstrip(os
.path
.sep
);
3686 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3687 if(not os
.path
.exists(outpath
)):
3688 os
.makedirs(outpath
);
3689 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3691 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3693 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3694 if(not pretmpfilename
):
3696 tmpfilename
= pretmpfilename
.get('Filename');
3697 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3699 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3700 exec_time_start
= time
.time();
3701 shutil
.move(tmpfilename
, filepath
);
3703 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3704 except AttributeError:
3706 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3711 exec_time_end
= time
.time();
3712 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3713 if(os
.path
.exists(tmpfilename
)):
3714 os
.remove(tmpfilename
);
3715 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3717 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3718 tmpfilename
= pretmpfilename
.get('Filename');
3719 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3722 exec_time_start
= time
.time();
3723 with
open(tmpfilename
, 'rb') as ft
:
3726 databytes
= ft
.read(buffersize
[1]);
3727 if not databytes
: break;
3728 datasize
= len(databytes
);
3729 fulldatasize
= datasize
+ fulldatasize
;
3732 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3733 downloaddiff
= fulldatasize
- prevdownsize
;
3734 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3735 prevdownsize
= fulldatasize
;
3738 fdata
= f
.getvalue();
3741 os
.remove(tmpfilename
);
3742 exec_time_end
= time
.time();
3743 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3744 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3747 if(not havemechanize
):
3748 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3749 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3753 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3754 global geturls_download_sleep
, havezstd
, havebrotli
;
3756 sleep
= geturls_download_sleep
;
3759 urlparts
= urlparse
.urlparse(httpurl
);
3760 if(isinstance(httpheaders
, list)):
3761 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3762 httpheaders
= fix_header_names(httpheaders
);
3763 if(httpuseragent
is not None):
3764 if('User-Agent' in httpheaders
):
3765 httpheaders
['User-Agent'] = httpuseragent
;
3767 httpuseragent
.update({'User-Agent': httpuseragent
});
3768 if(httpreferer
is not None):
3769 if('Referer' in httpheaders
):
3770 httpheaders
['Referer'] = httpreferer
;
3772 httpuseragent
.update({'Referer': httpreferer
});
3773 if(urlparts
.username
is not None or urlparts
.password
is not None):
3774 if(sys
.version
[0]=="2"):
3775 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3776 if(sys
.version
[0]>="3"):
3777 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3778 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3779 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
3780 if(isinstance(httpheaders
, dict)):
3781 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
3782 geturls_opener
.addheaders
= httpheaders
;
3784 if(postdata
is not None and not isinstance(postdata
, dict)):
3785 postdata
= urlencode(postdata
);
3786 retrieved_body
= BytesIO();
3787 retrieved_headers
= BytesIO();
3789 if(httpmethod
=="GET"):
3790 geturls_text
= pycurl
.Curl();
3791 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3792 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3793 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3794 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3795 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3796 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3797 geturls_text
.perform();
3798 elif(httpmethod
=="POST"):
3799 geturls_text
= pycurl
.Curl();
3800 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3801 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3802 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3803 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3804 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3805 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3806 geturls_text
.setopt(geturls_text
.POST
, True);
3807 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
3808 geturls_text
.perform();
3810 geturls_text
= pycurl
.Curl();
3811 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3812 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3813 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3814 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3815 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3816 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3817 geturls_text
.perform();
3818 retrieved_headers
.seek(0);
3819 if(sys
.version
[0]=="2"):
3820 pycurlhead
= retrieved_headers
.read();
3821 if(sys
.version
[0]>="3"):
3822 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
3823 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip().rstrip('\r\n'))[0];
3824 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
3825 retrieved_body
.seek(0);
3826 except socket
.timeout
:
3827 log
.info("Error With URL "+httpurl
);
3829 except socket
.gaierror
:
3830 log
.info("Error With URL "+httpurl
);
3833 log
.info("Error With URL "+httpurl
);
3835 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
3836 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
3837 httpversionout
= pyhttpverinfo
[0];
3838 httpmethodout
= httpmethod
;
3839 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
3840 httpheaderout
= pycurlheadersout
;
3841 httpheadersentout
= httpheaders
;
3842 if(isinstance(httpheaderout
, list)):
3843 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
3844 if(sys
.version
[0]=="2"):
3846 prehttpheaderout
= httpheaderout
;
3847 httpheaderkeys
= httpheaderout
.keys();
3848 imax
= len(httpheaderkeys
);
3852 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3854 except AttributeError:
3856 httpheaderout
= fix_header_names(httpheaderout
);
3857 if(isinstance(httpheadersentout
, list)):
3858 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
3859 httpheadersentout
= fix_header_names(httpheadersentout
);
3860 downloadsize
= httpheaderout
.get('Content-Length');
3861 if(downloadsize
is not None):
3862 downloadsize
= int(downloadsize
);
3863 if downloadsize
is None: downloadsize
= 0;
3866 log
.info("Downloading URL "+httpurl
);
3867 with
BytesIO() as strbuf
:
3869 databytes
= retrieved_body
.read(buffersize
);
3870 if not databytes
: break;
3871 datasize
= len(databytes
);
3872 fulldatasize
= datasize
+ fulldatasize
;
3875 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3876 downloaddiff
= fulldatasize
- prevdownsize
;
3877 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3878 prevdownsize
= fulldatasize
;
3879 strbuf
.write(databytes
);
3881 returnval_content
= strbuf
.read();
3882 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3884 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3887 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3889 returnval_content
= zlib
.decompress(returnval_content
);
3892 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3894 returnval_content
= brotli
.decompress(returnval_content
);
3895 except brotli
.error
:
3897 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3899 returnval_content
= zstandard
.decompress(returnval_content
);
3900 except zstandard
.error
:
3902 elif(httpheaderout
.get("Content-Encoding")=="lzma" and havelzma
):
3904 returnval_content
= lzma
.decompress(returnval_content
);
3905 except zstandard
.error
:
3907 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
3909 returnval_content
= bz2
.decompress(returnval_content
);
3910 except zstandard
.error
:
3912 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl"};
3913 geturls_text
.close();
3917 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3918 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3922 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3923 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3924 exec_time_start
= time
.time();
3925 myhash
= hashlib
.new("sha1");
3926 if(sys
.version
[0]=="2"):
3927 myhash
.update(httpurl
);
3928 myhash
.update(str(buffersize
));
3929 myhash
.update(str(exec_time_start
));
3930 if(sys
.version
[0]>="3"):
3931 myhash
.update(httpurl
.encode('utf-8'));
3932 myhash
.update(str(buffersize
).encode('utf-8'));
3933 myhash
.update(str(exec_time_start
).encode('utf-8'));
3934 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3936 sleep
= geturls_download_sleep
;
3939 pretmpfilename
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3940 if(not pretmpfilename
):
3942 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3943 tmpfilename
= f
.name
;
3945 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3946 except AttributeError:
3948 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3953 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
3954 f
.write(pretmpfilename
.get('Content'));
3956 exec_time_end
= time
.time();
3957 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3958 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3962 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3963 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3967 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3968 global geturls_download_sleep
, havezstd
, havebrotli
;
3970 sleep
= geturls_download_sleep
;
3973 if(not outfile
=="-"):
3974 outpath
= outpath
.rstrip(os
.path
.sep
);
3975 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3976 if(not os
.path
.exists(outpath
)):
3977 os
.makedirs(outpath
);
3978 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3980 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3982 pretmpfilename
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3983 if(not pretmpfilename
):
3985 tmpfilename
= pretmpfilename
.get('Filename');
3986 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3988 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3989 exec_time_start
= time
.time();
3990 shutil
.move(tmpfilename
, filepath
);
3992 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3993 except AttributeError:
3995 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4000 exec_time_end
= time
.time();
4001 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4002 if(os
.path
.exists(tmpfilename
)):
4003 os
.remove(tmpfilename
);
4004 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4006 pretmpfilename
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4007 tmpfilename
= pretmpfilename
.get('Filename');
4008 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4011 exec_time_start
= time
.time();
4012 with
open(tmpfilename
, 'rb') as ft
:
4015 databytes
= ft
.read(buffersize
[1]);
4016 if not databytes
: break;
4017 datasize
= len(databytes
);
4018 fulldatasize
= datasize
+ fulldatasize
;
4021 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4022 downloaddiff
= fulldatasize
- prevdownsize
;
4023 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4024 prevdownsize
= fulldatasize
;
4027 fdata
= f
.getvalue();
4030 os
.remove(tmpfilename
);
4031 exec_time_end
= time
.time();
4032 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4033 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4037 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4038 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4041 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4042 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4043 global geturls_download_sleep
, havezstd
, havebrotli
;
4045 sleep
= geturls_download_sleep
;
4048 urlparts
= urlparse
.urlparse(httpurl
);
4049 if(isinstance(httpheaders
, list)):
4050 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4051 httpheaders
= fix_header_names(httpheaders
);
4052 if(httpuseragent
is not None):
4053 if('User-Agent' in httpheaders
):
4054 httpheaders
['User-Agent'] = httpuseragent
;
4056 httpuseragent
.update({'User-Agent': httpuseragent
});
4057 if(httpreferer
is not None):
4058 if('Referer' in httpheaders
):
4059 httpheaders
['Referer'] = httpreferer
;
4061 httpuseragent
.update({'Referer': httpreferer
});
4062 if(urlparts
.username
is not None or urlparts
.password
is not None):
4063 if(sys
.version
[0]=="2"):
4064 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
4065 if(sys
.version
[0]>="3"):
4066 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4067 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4068 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
4069 if(isinstance(httpheaders
, dict)):
4070 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
4071 geturls_opener
.addheaders
= httpheaders
;
4073 if(postdata
is not None and not isinstance(postdata
, dict)):
4074 postdata
= urlencode(postdata
);
4075 retrieved_body
= BytesIO();
4076 retrieved_headers
= BytesIO();
4078 if(httpmethod
=="GET"):
4079 geturls_text
= pycurl
.Curl();
4080 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4081 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4082 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4083 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4084 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4085 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4086 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4087 geturls_text
.perform();
4088 elif(httpmethod
=="POST"):
4089 geturls_text
= pycurl
.Curl();
4090 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4091 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4092 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4093 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4094 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4095 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4096 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4097 geturls_text
.setopt(geturls_text
.POST
, True);
4098 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
4099 geturls_text
.perform();
4101 geturls_text
= pycurl
.Curl();
4102 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4103 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
4104 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4105 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4106 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4107 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4108 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4109 geturls_text
.perform();
4110 retrieved_headers
.seek(0);
4111 if(sys
.version
[0]=="2"):
4112 pycurlhead
= retrieved_headers
.read();
4113 if(sys
.version
[0]>="3"):
4114 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
4115 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip())[0];
4116 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
4117 retrieved_body
.seek(0);
4118 except socket
.timeout
:
4119 log
.info("Error With URL "+httpurl
);
4121 except socket
.gaierror
:
4122 log
.info("Error With URL "+httpurl
);
4125 log
.info("Error With URL "+httpurl
);
4127 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
4128 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
4129 httpversionout
= pyhttpverinfo
[0];
4130 httpmethodout
= httpmethod
;
4131 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
4132 httpheaderout
= pycurlheadersout
;
4133 httpheadersentout
= httpheaders
;
4134 if(isinstance(httpheaderout
, list)):
4135 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
4136 if(sys
.version
[0]=="2"):
4138 prehttpheaderout
= httpheaderout
;
4139 httpheaderkeys
= httpheaderout
.keys();
4140 imax
= len(httpheaderkeys
);
4144 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4146 except AttributeError:
4148 httpheaderout
= fix_header_names(httpheaderout
);
4149 if(isinstance(httpheadersentout
, list)):
4150 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
4151 httpheadersentout
= fix_header_names(httpheadersentout
);
4152 downloadsize
= httpheaderout
.get('Content-Length');
4153 if(downloadsize
is not None):
4154 downloadsize
= int(downloadsize
);
4155 if downloadsize
is None: downloadsize
= 0;
4158 log
.info("Downloading URL "+httpurl
);
4159 with
BytesIO() as strbuf
:
4161 databytes
= retrieved_body
.read(buffersize
);
4162 if not databytes
: break;
4163 datasize
= len(databytes
);
4164 fulldatasize
= datasize
+ fulldatasize
;
4167 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4168 downloaddiff
= fulldatasize
- prevdownsize
;
4169 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4170 prevdownsize
= fulldatasize
;
4171 strbuf
.write(databytes
);
4173 returnval_content
= strbuf
.read();
4174 if(httpheaderout
.get("Content-Encoding")=="gzip"):
4176 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
4179 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
4181 returnval_content
= zlib
.decompress(returnval_content
);
4184 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4186 returnval_content
= brotli
.decompress(returnval_content
);
4187 except brotli
.error
:
4189 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
4191 returnval_content
= zstandard
.decompress(returnval_content
);
4192 except zstandard
.error
:
4194 elif(httpheaderout
.get("Content-Encoding")=="lzma" and havelzma
):
4196 returnval_content
= lzma
.decompress(returnval_content
);
4197 except zstandard
.error
:
4199 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
4201 returnval_content
= bz2
.decompress(returnval_content
);
4202 except zstandard
.error
:
4204 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl2"};
4205 geturls_text
.close();
4209 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4210 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4213 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4214 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4215 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4218 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4219 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4220 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4221 exec_time_start
= time
.time();
4222 myhash
= hashlib
.new("sha1");
4223 if(sys
.version
[0]=="2"):
4224 myhash
.update(httpurl
);
4225 myhash
.update(str(buffersize
));
4226 myhash
.update(str(exec_time_start
));
4227 if(sys
.version
[0]>="3"):
4228 myhash
.update(httpurl
.encode('utf-8'));
4229 myhash
.update(str(buffersize
).encode('utf-8'));
4230 myhash
.update(str(exec_time_start
).encode('utf-8'));
4231 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4233 sleep
= geturls_download_sleep
;
4236 pretmpfilename
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4237 if(not pretmpfilename
):
4239 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4240 tmpfilename
= f
.name
;
4242 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4243 except AttributeError:
4245 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4250 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4251 f
.write(pretmpfilename
.get('Content'));
4253 exec_time_end
= time
.time();
4254 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4255 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4259 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4260 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4263 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4264 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4265 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4268 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4269 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4270 global geturls_download_sleep
, havezstd
, havebrotli
;
4272 sleep
= geturls_download_sleep
;
4275 if(not outfile
=="-"):
4276 outpath
= outpath
.rstrip(os
.path
.sep
);
4277 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4278 if(not os
.path
.exists(outpath
)):
4279 os
.makedirs(outpath
);
4280 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4282 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4284 pretmpfilename
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4285 if(not pretmpfilename
):
4287 tmpfilename
= pretmpfilename
.get('Filename');
4288 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4290 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4291 exec_time_start
= time
.time();
4292 shutil
.move(tmpfilename
, filepath
);
4294 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4295 except AttributeError:
4297 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4302 exec_time_end
= time
.time();
4303 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4304 if(os
.path
.exists(tmpfilename
)):
4305 os
.remove(tmpfilename
);
4306 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4308 pretmpfilename
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4309 tmpfilename
= pretmpfilename
.get('Filename');
4310 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4313 exec_time_start
= time
.time();
4314 with
open(tmpfilename
, 'rb') as ft
:
4317 databytes
= ft
.read(buffersize
[1]);
4318 if not databytes
: break;
4319 datasize
= len(databytes
);
4320 fulldatasize
= datasize
+ fulldatasize
;
4323 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4324 downloaddiff
= fulldatasize
- prevdownsize
;
4325 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4326 prevdownsize
= fulldatasize
;
4329 fdata
= f
.getvalue();
4332 os
.remove(tmpfilename
);
4333 exec_time_end
= time
.time();
4334 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4335 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4339 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4340 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4343 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4344 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4345 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4348 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4349 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4350 global geturls_download_sleep
, havezstd
, havebrotli
;
4352 sleep
= geturls_download_sleep
;
4355 urlparts
= urlparse
.urlparse(httpurl
);
4356 if(isinstance(httpheaders
, list)):
4357 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4358 httpheaders
= fix_header_names(httpheaders
);
4359 if(httpuseragent
is not None):
4360 if('User-Agent' in httpheaders
):
4361 httpheaders
['User-Agent'] = httpuseragent
;
4363 httpuseragent
.update({'User-Agent': httpuseragent
});
4364 if(httpreferer
is not None):
4365 if('Referer' in httpheaders
):
4366 httpheaders
['Referer'] = httpreferer
;
4368 httpuseragent
.update({'Referer': httpreferer
});
4369 if(urlparts
.username
is not None or urlparts
.password
is not None):
4370 if(sys
.version
[0]=="2"):
4371 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
4372 if(sys
.version
[0]>="3"):
4373 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4374 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4375 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
4376 if(isinstance(httpheaders
, dict)):
4377 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
4378 geturls_opener
.addheaders
= httpheaders
;
4380 if(postdata
is not None and not isinstance(postdata
, dict)):
4381 postdata
= urlencode(postdata
);
4382 retrieved_body
= BytesIO();
4383 retrieved_headers
= BytesIO();
4385 if(httpmethod
=="GET"):
4386 geturls_text
= pycurl
.Curl();
4387 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4388 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4389 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4390 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4391 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4392 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4393 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4394 geturls_text
.perform();
4395 elif(httpmethod
=="POST"):
4396 geturls_text
= pycurl
.Curl();
4397 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4398 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4399 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4400 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4401 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4402 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4403 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4404 geturls_text
.setopt(geturls_text
.POST
, True);
4405 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
4406 geturls_text
.perform();
4408 geturls_text
= pycurl
.Curl();
4409 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4410 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4411 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4412 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4413 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4414 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4415 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4416 geturls_text
.perform();
4417 retrieved_headers
.seek(0);
4418 if(sys
.version
[0]=="2"):
4419 pycurlhead
= retrieved_headers
.read();
4420 if(sys
.version
[0]>="3"):
4421 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
4422 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip().rstrip('\r\n'))[0];
4423 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
4424 retrieved_body
.seek(0);
4425 except socket
.timeout
:
4426 log
.info("Error With URL "+httpurl
);
4428 except socket
.gaierror
:
4429 log
.info("Error With URL "+httpurl
);
4432 log
.info("Error With URL "+httpurl
);
4434 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
4435 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
4436 httpversionout
= pyhttpverinfo
[0];
4437 httpmethodout
= httpmethod
;
4438 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
4439 httpheaderout
= pycurlheadersout
;
4440 httpheadersentout
= httpheaders
;
4441 if(isinstance(httpheaderout
, list)):
4442 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
4443 if(sys
.version
[0]=="2"):
4445 prehttpheaderout
= httpheaderout
;
4446 httpheaderkeys
= httpheaderout
.keys();
4447 imax
= len(httpheaderkeys
);
4451 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4453 except AttributeError:
4455 httpheaderout
= fix_header_names(httpheaderout
);
4456 if(isinstance(httpheadersentout
, list)):
4457 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
4458 httpheadersentout
= fix_header_names(httpheadersentout
);
4459 downloadsize
= httpheaderout
.get('Content-Length');
4460 if(downloadsize
is not None):
4461 downloadsize
= int(downloadsize
);
4462 if downloadsize
is None: downloadsize
= 0;
4465 log
.info("Downloading URL "+httpurl
);
4466 with
BytesIO() as strbuf
:
4468 databytes
= retrieved_body
.read(buffersize
);
4469 if not databytes
: break;
4470 datasize
= len(databytes
);
4471 fulldatasize
= datasize
+ fulldatasize
;
4474 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4475 downloaddiff
= fulldatasize
- prevdownsize
;
4476 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4477 prevdownsize
= fulldatasize
;
4478 strbuf
.write(databytes
);
4480 returnval_content
= strbuf
.read();
4481 if(httpheaderout
.get("Content-Encoding")=="gzip"):
4483 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
4486 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
4488 returnval_content
= zlib
.decompress(returnval_content
);
4491 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4493 returnval_content
= brotli
.decompress(returnval_content
);
4494 except brotli
.error
:
4496 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
4498 returnval_content
= zstandard
.decompress(returnval_content
);
4499 except zstandard
.error
:
4501 elif(httpheaderout
.get("Content-Encoding")=="lzma" and havelzma
):
4503 returnval_content
= lzma
.decompress(returnval_content
);
4504 except zstandard
.error
:
4506 elif(httpheaderout
.get("Content-Encoding")=="bzip2"):
4508 returnval_content
= bz2
.decompress(returnval_content
);
4509 except zstandard
.error
:
4511 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
, 'HTTPLib': "pycurl3"};
4512 geturls_text
.close();
4516 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4517 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4520 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4521 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4522 returnval
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4525 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4526 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4527 returnval
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4530 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4531 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4532 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4533 exec_time_start
= time
.time();
4534 myhash
= hashlib
.new("sha1");
4535 if(sys
.version
[0]=="2"):
4536 myhash
.update(httpurl
);
4537 myhash
.update(str(buffersize
));
4538 myhash
.update(str(exec_time_start
));
4539 if(sys
.version
[0]>="3"):
4540 myhash
.update(httpurl
.encode('utf-8'));
4541 myhash
.update(str(buffersize
).encode('utf-8'));
4542 myhash
.update(str(exec_time_start
).encode('utf-8'));
4543 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4545 sleep
= geturls_download_sleep
;
4548 pretmpfilename
= download_from_url_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4549 if(not pretmpfilename
):
4551 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4552 tmpfilename
= f
.name
;
4554 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4555 except AttributeError:
4557 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4562 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4563 f
.write(pretmpfilename
.get('Content'));
4565 exec_time_end
= time
.time();
4566 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4567 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4571 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4572 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4575 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4576 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4577 returnval
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4580 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4581 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4582 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4585 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4586 def download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4587 global geturls_download_sleep
, havezstd
, havebrotli
;
4589 sleep
= geturls_download_sleep
;
4592 if(not outfile
=="-"):
4593 outpath
= outpath
.rstrip(os
.path
.sep
);
4594 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4595 if(not os
.path
.exists(outpath
)):
4596 os
.makedirs(outpath
);
4597 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4599 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4601 pretmpfilename
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4602 if(not pretmpfilename
):
4604 tmpfilename
= pretmpfilename
.get('Filename');
4605 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4607 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4608 exec_time_start
= time
.time();
4609 shutil
.move(tmpfilename
, filepath
);
4611 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4612 except AttributeError:
4614 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4619 exec_time_end
= time
.time();
4620 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4621 if(os
.path
.exists(tmpfilename
)):
4622 os
.remove(tmpfilename
);
4623 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4625 pretmpfilename
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4626 tmpfilename
= pretmpfilename
.get('Filename');
4627 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4630 exec_time_start
= time
.time();
4631 with
open(tmpfilename
, 'rb') as ft
:
4634 databytes
= ft
.read(buffersize
[1]);
4635 if not databytes
: break;
4636 datasize
= len(databytes
);
4637 fulldatasize
= datasize
+ fulldatasize
;
4640 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4641 downloaddiff
= fulldatasize
- prevdownsize
;
4642 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4643 prevdownsize
= fulldatasize
;
4646 fdata
= f
.getvalue();
4649 os
.remove(tmpfilename
);
4650 exec_time_end
= time
.time();
4651 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4652 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': httpmethod
, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4656 def download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4657 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4660 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4661 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4662 returnval
= download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4665 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4666 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4667 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4670 def download_file_from_ftp_file(url
):
4671 urlparts
= urlparse
.urlparse(url
);
4672 file_name
= os
.path
.basename(urlparts
.path
);
4673 file_dir
= os
.path
.dirname(urlparts
.path
);
4674 if(urlparts
.username
is not None):
4675 ftp_username
= urlparts
.username
;
4677 ftp_username
= "anonymous";
4678 if(urlparts
.password
is not None):
4679 ftp_password
= urlparts
.password
;
4680 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4681 ftp_password
= "anonymous";
4684 if(urlparts
.scheme
=="ftp"):
4686 elif(urlparts
.scheme
=="ftps"):
4690 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4692 ftp_port
= urlparts
.port
;
4693 if(urlparts
.port
is None):
4696 ftp
.connect(urlparts
.hostname
, ftp_port
);
4697 except socket
.gaierror
:
4698 log
.info("Error With URL "+httpurl
);
4700 except socket
.timeout
:
4701 log
.info("Error With URL "+httpurl
);
4703 ftp
.login(urlparts
.username
, urlparts
.password
);
4704 if(urlparts
.scheme
=="ftps"):
4706 ftpfile
= BytesIO();
4707 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
4708 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
4713 def download_file_from_ftp_string(url
):
4714 ftpfile
= download_file_from_ftp_file(url
);
4715 return ftpfile
.read();
4717 def download_from_url_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4718 global geturls_download_sleep
, havezstd
, havebrotli
;
4720 sleep
= geturls_download_sleep
;
4723 urlparts
= urlparse
.urlparse(httpurl
);
4724 if(isinstance(httpheaders
, list)):
4725 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4726 httpheaders
= fix_header_names(httpheaders
);
4727 if(httpuseragent
is not None):
4728 if('User-Agent' in httpheaders
):
4729 httpheaders
['User-Agent'] = httpuseragent
;
4731 httpuseragent
.update({'User-Agent': httpuseragent
});
4732 if(httpreferer
is not None):
4733 if('Referer' in httpheaders
):
4734 httpheaders
['Referer'] = httpreferer
;
4736 httpuseragent
.update({'Referer': httpreferer
});
4737 if(isinstance(httpheaders
, dict)):
4738 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4740 geturls_text
= download_file_from_ftp_file(httpurl
);
4741 if(not geturls_text
):
4743 downloadsize
= None;
4744 if(downloadsize
is not None):
4745 downloadsize
= int(downloadsize
);
4746 if downloadsize
is None: downloadsize
= 0;
4749 log
.info("Downloading URL "+httpurl
);
4750 with
BytesIO() as strbuf
:
4752 databytes
= geturls_text
.read(buffersize
);
4753 if not databytes
: break;
4754 datasize
= len(databytes
);
4755 fulldatasize
= datasize
+ fulldatasize
;
4758 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4759 downloaddiff
= fulldatasize
- prevdownsize
;
4760 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4761 prevdownsize
= fulldatasize
;
4762 strbuf
.write(databytes
);
4764 returnval_content
= strbuf
.read();
4765 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4766 geturls_text
.close();
4769 def download_from_url_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4770 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4771 exec_time_start
= time
.time();
4772 myhash
= hashlib
.new("sha1");
4773 if(sys
.version
[0]=="2"):
4774 myhash
.update(httpurl
);
4775 myhash
.update(str(buffersize
));
4776 myhash
.update(str(exec_time_start
));
4777 if(sys
.version
[0]>="3"):
4778 myhash
.update(httpurl
.encode('utf-8'));
4779 myhash
.update(str(buffersize
).encode('utf-8'));
4780 myhash
.update(str(exec_time_start
).encode('utf-8'));
4781 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4783 sleep
= geturls_download_sleep
;
4786 pretmpfilename
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4787 if(not pretmpfilename
):
4789 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4790 tmpfilename
= f
.name
;
4792 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4793 except AttributeError:
4795 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4800 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4801 f
.write(pretmpfilename
.get('Content'));
4803 exec_time_end
= time
.time();
4804 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4805 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4808 def download_from_url_to_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4809 global geturls_download_sleep
, havezstd
, havebrotli
;
4811 sleep
= geturls_download_sleep
;
4814 if(not outfile
=="-"):
4815 outpath
= outpath
.rstrip(os
.path
.sep
);
4816 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4817 if(not os
.path
.exists(outpath
)):
4818 os
.makedirs(outpath
);
4819 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4821 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4823 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4824 if(not pretmpfilename
):
4826 tmpfilename
= pretmpfilename
.get('Filename');
4827 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4829 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4830 exec_time_start
= time
.time();
4831 shutil
.move(tmpfilename
, filepath
);
4832 exec_time_end
= time
.time();
4833 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4834 if(os
.path
.exists(tmpfilename
)):
4835 os
.remove(tmpfilename
);
4836 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4838 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4839 tmpfilename
= pretmpfilename
.get('Filename');
4840 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4843 exec_time_start
= time
.time();
4844 with
open(tmpfilename
, 'rb') as ft
:
4847 databytes
= ft
.read(buffersize
[1]);
4848 if not databytes
: break;
4849 datasize
= len(databytes
);
4850 fulldatasize
= datasize
+ fulldatasize
;
4853 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4854 downloaddiff
= fulldatasize
- prevdownsize
;
4855 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4856 prevdownsize
= fulldatasize
;
4859 fdata
= f
.getvalue();
4862 os
.remove(tmpfilename
);
4863 exec_time_end
= time
.time();
4864 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4865 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
4868 def upload_file_to_ftp_file(ftpfile
, url
):
4869 urlparts
= urlparse
.urlparse(url
);
4870 file_name
= os
.path
.basename(urlparts
.path
);
4871 file_dir
= os
.path
.dirname(urlparts
.path
);
4872 if(urlparts
.username
is not None):
4873 ftp_username
= urlparts
.username
;
4875 ftp_username
= "anonymous";
4876 if(urlparts
.password
is not None):
4877 ftp_password
= urlparts
.password
;
4878 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4879 ftp_password
= "anonymous";
4882 if(urlparts
.scheme
=="ftp"):
4884 elif(urlparts
.scheme
=="ftps"):
4888 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4890 ftp_port
= urlparts
.port
;
4891 if(urlparts
.port
is None):
4894 ftp
.connect(urlparts
.hostname
, ftp_port
);
4895 except socket
.gaierror
:
4896 log
.info("Error With URL "+httpurl
);
4898 except socket
.timeout
:
4899 log
.info("Error With URL "+httpurl
);
4901 ftp
.login(urlparts
.username
, urlparts
.password
);
4902 if(urlparts
.scheme
=="ftps"):
4904 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
4909 def upload_file_to_ftp_string(ftpstring
, url
):
4910 ftpfileo
= BytesIO(ftpstring
);
4911 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
4916 def download_file_from_sftp_file(url
):
4917 urlparts
= urlparse
.urlparse(url
);
4918 file_name
= os
.path
.basename(urlparts
.path
);
4919 file_dir
= os
.path
.dirname(urlparts
.path
);
4920 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4922 sftp_port
= urlparts
.port
;
4923 if(urlparts
.port
is None):
4926 sftp_port
= urlparts
.port
;
4927 if(urlparts
.username
is not None):
4928 sftp_username
= urlparts
.username
;
4930 sftp_username
= "anonymous";
4931 if(urlparts
.password
is not None):
4932 sftp_password
= urlparts
.password
;
4933 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4934 sftp_password
= "anonymous";
4937 if(urlparts
.scheme
!="sftp"):
4939 ssh
= paramiko
.SSHClient();
4940 ssh
.load_system_host_keys();
4941 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
4943 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
4944 except paramiko
.ssh_exception
.SSHException
:
4946 except socket
.gaierror
:
4947 log
.info("Error With URL "+httpurl
);
4949 except socket
.timeout
:
4950 log
.info("Error With URL "+httpurl
);
4952 sftp
= ssh
.open_sftp();
4953 sftpfile
= BytesIO();
4954 sftp
.getfo(urlparts
.path
, sftpfile
);
4957 sftpfile
.seek(0, 0);
4960 def download_file_from_sftp_file(url
):
4964 def download_file_from_sftp_string(url
):
4965 sftpfile
= download_file_from_sftp_file(url
);
4966 return sftpfile
.read();
4968 def download_file_from_ftp_string(url
):
4972 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4973 global geturls_download_sleep
, havezstd
, havebrotli
;
4975 sleep
= geturls_download_sleep
;
4978 urlparts
= urlparse
.urlparse(httpurl
);
4979 if(isinstance(httpheaders
, list)):
4980 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4981 httpheaders
= fix_header_names(httpheaders
);
4982 if(httpuseragent
is not None):
4983 if('User-Agent' in httpheaders
):
4984 httpheaders
['User-Agent'] = httpuseragent
;
4986 httpuseragent
.update({'User-Agent': httpuseragent
});
4987 if(httpreferer
is not None):
4988 if('Referer' in httpheaders
):
4989 httpheaders
['Referer'] = httpreferer
;
4991 httpuseragent
.update({'Referer': httpreferer
});
4992 if(isinstance(httpheaders
, dict)):
4993 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4995 geturls_text
= download_file_from_sftp_file(httpurl
);
4996 if(not geturls_text
):
4998 downloadsize
= None;
4999 if(downloadsize
is not None):
5000 downloadsize
= int(downloadsize
);
5001 if downloadsize
is None: downloadsize
= 0;
5004 log
.info("Downloading URL "+httpurl
);
5005 with
BytesIO() as strbuf
:
5007 databytes
= geturls_text
.read(buffersize
);
5008 if not databytes
: break;
5009 datasize
= len(databytes
);
5010 fulldatasize
= datasize
+ fulldatasize
;
5013 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5014 downloaddiff
= fulldatasize
- prevdownsize
;
5015 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5016 prevdownsize
= fulldatasize
;
5017 strbuf
.write(databytes
);
5019 returnval_content
= strbuf
.read();
5020 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5021 geturls_text
.close();
5024 if(not haveparamiko
):
5025 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5029 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5030 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
5031 exec_time_start
= time
.time();
5032 myhash
= hashlib
.new("sha1");
5033 if(sys
.version
[0]=="2"):
5034 myhash
.update(httpurl
);
5035 myhash
.update(str(buffersize
));
5036 myhash
.update(str(exec_time_start
));
5037 if(sys
.version
[0]>="3"):
5038 myhash
.update(httpurl
.encode('utf-8'));
5039 myhash
.update(str(buffersize
).encode('utf-8'));
5040 myhash
.update(str(exec_time_start
).encode('utf-8'));
5041 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
5043 sleep
= geturls_download_sleep
;
5046 pretmpfilename
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
5047 if(not pretmpfilename
):
5049 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
5050 tmpfilename
= f
.name
;
5052 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
5053 except AttributeError:
5055 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
5060 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5061 f
.write(pretmpfilename
.get('Content'));
5063 exec_time_end
= time
.time();
5064 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
5065 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
5068 if(not haveparamiko
):
5069 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5073 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5074 global geturls_download_sleep
, havezstd
, havebrotli
;
5076 sleep
= geturls_download_sleep
;
5079 if(not outfile
=="-"):
5080 outpath
= outpath
.rstrip(os
.path
.sep
);
5081 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
5082 if(not os
.path
.exists(outpath
)):
5083 os
.makedirs(outpath
);
5084 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
5086 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
5088 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5089 if(not pretmpfilename
):
5091 tmpfilename
= pretmpfilename
.get('Filename');
5092 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5094 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
5095 exec_time_start
= time
.time();
5096 shutil
.move(tmpfilename
, filepath
);
5097 exec_time_end
= time
.time();
5098 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
5099 if(os
.path
.exists(tmpfilename
)):
5100 os
.remove(tmpfilename
);
5101 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5103 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5104 tmpfilename
= pretmpfilename
.get('Filename');
5105 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5108 exec_time_start
= time
.time();
5109 with
open(tmpfilename
, 'rb') as ft
:
5112 databytes
= ft
.read(buffersize
[1]);
5113 if not databytes
: break;
5114 datasize
= len(databytes
);
5115 fulldatasize
= datasize
+ fulldatasize
;
5118 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5119 downloaddiff
= fulldatasize
- prevdownsize
;
5120 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5121 prevdownsize
= fulldatasize
;
5124 fdata
= f
.getvalue();
5127 os
.remove(tmpfilename
);
5128 exec_time_end
= time
.time();
5129 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5130 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5133 if(not haveparamiko
):
5134 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5138 def upload_file_to_sftp_file(sftpfile
, url
):
5139 urlparts
= urlparse
.urlparse(url
);
5140 file_name
= os
.path
.basename(urlparts
.path
);
5141 file_dir
= os
.path
.dirname(urlparts
.path
);
5142 sftp_port
= urlparts
.port
;
5143 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5145 if(urlparts
.port
is None):
5148 sftp_port
= urlparts
.port
;
5149 if(urlparts
.username
is not None):
5150 sftp_username
= urlparts
.username
;
5152 sftp_username
= "anonymous";
5153 if(urlparts
.password
is not None):
5154 sftp_password
= urlparts
.password
;
5155 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5156 sftp_password
= "anonymous";
5159 if(urlparts
.scheme
!="sftp"):
5161 ssh
= paramiko
.SSHClient();
5162 ssh
.load_system_host_keys();
5163 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
5165 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5166 except paramiko
.ssh_exception
.SSHException
:
5168 except socket
.gaierror
:
5169 log
.info("Error With URL "+httpurl
);
5171 except socket
.timeout
:
5172 log
.info("Error With URL "+httpurl
);
5174 sftp
= ssh
.open_sftp();
5175 sftp
.putfo(sftpfile
, urlparts
.path
);
5178 sftpfile
.seek(0, 0);
5181 def upload_file_to_sftp_file(sftpfile
, url
):
5185 def upload_file_to_sftp_string(sftpstring
, url
):
5186 sftpfileo
= BytesIO(sftpstring
);
5187 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
5191 def upload_file_to_sftp_string(url
):
5196 def download_file_from_pysftp_file(url
):
5197 urlparts
= urlparse
.urlparse(url
);
5198 file_name
= os
.path
.basename(urlparts
.path
);
5199 file_dir
= os
.path
.dirname(urlparts
.path
);
5200 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5202 sftp_port
= urlparts
.port
;
5203 if(urlparts
.port
is None):
5206 sftp_port
= urlparts
.port
;
5207 if(urlparts
.username
is not None):
5208 sftp_username
= urlparts
.username
;
5210 sftp_username
= "anonymous";
5211 if(urlparts
.password
is not None):
5212 sftp_password
= urlparts
.password
;
5213 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5214 sftp_password
= "anonymous";
5217 if(urlparts
.scheme
!="sftp"):
5220 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5221 except paramiko
.ssh_exception
.SSHException
:
5223 except socket
.gaierror
:
5224 log
.info("Error With URL "+httpurl
);
5226 except socket
.timeout
:
5227 log
.info("Error With URL "+httpurl
);
5229 sftp
= ssh
.open_sftp();
5230 sftpfile
= BytesIO();
5231 sftp
.getfo(urlparts
.path
, sftpfile
);
5234 sftpfile
.seek(0, 0);
5237 def download_file_from_pysftp_file(url
):
5241 def download_file_from_pysftp_string(url
):
5242 sftpfile
= download_file_from_pysftp_file(url
);
5243 return sftpfile
.read();
5245 def download_file_from_ftp_string(url
):
5249 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5250 global geturls_download_sleep
, havezstd
, havebrotli
;
5252 sleep
= geturls_download_sleep
;
5255 urlparts
= urlparse
.urlparse(httpurl
);
5256 if(isinstance(httpheaders
, list)):
5257 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
5258 httpheaders
= fix_header_names(httpheaders
);
5259 if(isinstance(httpheaders
, dict)):
5260 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
5262 geturls_text
= download_file_from_pysftp_file(httpurl
);
5263 if(not geturls_text
):
5265 downloadsize
= None;
5266 if(downloadsize
is not None):
5267 downloadsize
= int(downloadsize
);
5268 if downloadsize
is None: downloadsize
= 0;
5271 log
.info("Downloading URL "+httpurl
);
5272 with
BytesIO() as strbuf
:
5274 databytes
= geturls_text
.read(buffersize
);
5275 if not databytes
: break;
5276 datasize
= len(databytes
);
5277 fulldatasize
= datasize
+ fulldatasize
;
5280 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5281 downloaddiff
= fulldatasize
- prevdownsize
;
5282 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5283 prevdownsize
= fulldatasize
;
5284 strbuf
.write(databytes
);
5286 returnval_content
= strbuf
.read();
5287 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5288 geturls_text
.close();
5292 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5296 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5297 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
5298 exec_time_start
= time
.time();
5299 myhash
= hashlib
.new("sha1");
5300 if(sys
.version
[0]=="2"):
5301 myhash
.update(httpurl
);
5302 myhash
.update(str(buffersize
));
5303 myhash
.update(str(exec_time_start
));
5304 if(sys
.version
[0]>="3"):
5305 myhash
.update(httpurl
.encode('utf-8'));
5306 myhash
.update(str(buffersize
).encode('utf-8'));
5307 myhash
.update(str(exec_time_start
).encode('utf-8'));
5308 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
5310 sleep
= geturls_download_sleep
;
5313 pretmpfilename
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
5314 if(not pretmpfilename
):
5316 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
5317 tmpfilename
= f
.name
;
5319 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
5320 except AttributeError:
5322 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
5327 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5328 f
.write(pretmpfilename
.get('Content'));
5330 exec_time_end
= time
.time();
5331 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
5332 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
5336 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5340 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5341 global geturls_download_sleep
, havezstd
, havebrotli
;
5343 sleep
= geturls_download_sleep
;
5346 if(not outfile
=="-"):
5347 outpath
= outpath
.rstrip(os
.path
.sep
);
5348 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
5349 if(not os
.path
.exists(outpath
)):
5350 os
.makedirs(outpath
);
5351 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
5353 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
5355 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5356 if(not pretmpfilename
):
5358 tmpfilename
= pretmpfilename
.get('Filename');
5359 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5361 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
5362 exec_time_start
= time
.time();
5363 shutil
.move(tmpfilename
, filepath
);
5364 exec_time_end
= time
.time();
5365 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
5366 if(os
.path
.exists(tmpfilename
)):
5367 os
.remove(tmpfilename
);
5368 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5370 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5371 tmpfilename
= pretmpfilename
.get('Filename');
5372 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5375 exec_time_start
= time
.time();
5376 with
open(tmpfilename
, 'rb') as ft
:
5379 databytes
= ft
.read(buffersize
[1]);
5380 if not databytes
: break;
5381 datasize
= len(databytes
);
5382 fulldatasize
= datasize
+ fulldatasize
;
5385 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5386 downloaddiff
= fulldatasize
- prevdownsize
;
5387 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5388 prevdownsize
= fulldatasize
;
5391 fdata
= f
.getvalue();
5394 os
.remove(tmpfilename
);
5395 exec_time_end
= time
.time();
5396 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5397 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
.get('DownloadTime'), 'DownloadTimeReadable': pretmpfilename
.get('DownloadTimeReadable'), 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'Method': None, 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason'), 'HTTPLib': pretmpfilename
.get('HTTPLib')};
5401 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5405 def upload_file_to_pysftp_file(sftpfile
, url
):
5406 urlparts
= urlparse
.urlparse(url
);
5407 file_name
= os
.path
.basename(urlparts
.path
);
5408 file_dir
= os
.path
.dirname(urlparts
.path
);
5409 sftp_port
= urlparts
.port
;
5410 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5412 if(urlparts
.port
is None):
5415 sftp_port
= urlparts
.port
;
5416 if(urlparts
.username
is not None):
5417 sftp_username
= urlparts
.username
;
5419 sftp_username
= "anonymous";
5420 if(urlparts
.password
is not None):
5421 sftp_password
= urlparts
.password
;
5422 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5423 sftp_password
= "anonymous";
5426 if(urlparts
.scheme
!="sftp"):
5429 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5430 except paramiko
.ssh_exception
.SSHException
:
5432 except socket
.gaierror
:
5433 log
.info("Error With URL "+httpurl
);
5435 except socket
.timeout
:
5436 log
.info("Error With URL "+httpurl
);
5438 sftp
= ssh
.open_sftp();
5439 sftp
.putfo(sftpfile
, urlparts
.path
);
5442 sftpfile
.seek(0, 0);
5445 def upload_file_to_pysftp_file(sftpfile
, url
):
5449 def upload_file_to_pysftp_string(sftpstring
, url
):
5450 sftpfileo
= BytesIO(sftpstring
);
5451 sftpfile
= upload_file_to_pysftp_files(ftpfileo
, url
);
5455 def upload_file_to_pysftp_string(url
):