4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwgetold.py - Last Update: 10/5/2023 Ver. 2.0.2 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, zlib
, time
, argparse
, cgi
, subprocess
, socket
, email
.utils
, datetime
, time
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
29 havemechanize
= False;
34 havemechanize
= False;
62 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
96 if(sys
.version
[0]=="2"):
98 from io
import StringIO
, BytesIO
;
101 from cStringIO
import StringIO
;
102 from cStringIO
import StringIO
as BytesIO
;
104 from StringIO
import StringIO
;
105 from StringIO
import StringIO
as BytesIO
;
106 # From http://python-future.org/compatible_idioms.html
107 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
108 from urllib
import urlencode
;
109 from urllib
import urlopen
as urlopenalt
;
110 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
, URLError
, build_opener
, HTTPCookieProcessor
;
111 import urlparse
, cookielib
;
112 from httplib
import HTTPConnection
, HTTPSConnection
;
113 if(sys
.version
[0]>="3"):
114 from io
import StringIO
, BytesIO
;
115 # From http://python-future.org/compatible_idioms.html
116 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
117 from urllib
.request
import urlopen
, Request
, install_opener
, build_opener
, HTTPCookieProcessor
;
118 from urllib
.error
import HTTPError
, URLError
;
119 import urllib
.parse
as urlparse
;
120 import http
.cookiejar
as cookielib
;
121 from http
.client
import HTTPConnection
, HTTPSConnection
;
123 __program_name__
= "PyWWW-Get";
124 __program_alt_name__
= "PyWWWGet";
125 __program_small_name__
= "wwwget";
126 __project__
= __program_name__
;
127 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
128 __version_info__
= (2, 0, 2, "RC 1", 1);
129 __version_date_info__
= (2023, 10, 5, "RC 1", 1);
130 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
131 __revision__
= __version_info__
[3];
132 __revision_id__
= "$Id: 346873973981d4271d1530c99011ffb8047a7d18 $";
133 if(__version_info__
[4] is not None):
134 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
135 if(__version_info__
[4] is None):
136 __version_date_plusrc__
= __version_date__
;
137 if(__version_info__
[3] is not None):
138 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
139 if(__version_info__
[3] is None):
140 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
142 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
144 pytempdir
= tempfile
.gettempdir();
146 PyBitness
= platform
.architecture();
147 if(PyBitness
=="32bit" or PyBitness
=="32"):
149 elif(PyBitness
=="64bit" or PyBitness
=="64"):
154 compression_supported
= "gzip, deflate";
155 if(havebrotli
and not havezstd
):
156 compression_supported
= "gzip, deflate, br";
157 elif(not havebrotli
and havezstd
):
158 compression_supported
= "gzip, deflate, zstd";
159 elif(havebrotli
and havezstd
):
160 compression_supported
= "gzip, deflate, zstd, br";
162 compression_supported
= "gzip, deflate";
164 geturls_cj
= cookielib
.CookieJar();
165 windowsNT4_ua_string
= "Windows NT 4.0";
166 windowsNT4_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
167 windows2k_ua_string
= "Windows NT 5.0";
168 windows2k_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
169 windowsXP_ua_string
= "Windows NT 5.1";
170 windowsXP_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
171 windowsXP64_ua_string
= "Windows NT 5.2; Win64; x64";
172 windowsXP64_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
173 windows7_ua_string
= "Windows NT 6.1; Win64; x64";
174 windows7_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
175 windows8_ua_string
= "Windows NT 6.2; Win64; x64";
176 windows8_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
177 windows81_ua_string
= "Windows NT 6.3; Win64; x64";
178 windows81_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
179 windows10_ua_string
= "Windows NT 10.0; Win64; x64";
180 windows10_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
181 windows11_ua_string
= "Windows NT 11.0; Win64; x64";
182 windows11_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
183 geturls_ua_firefox_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:109.0) Gecko/20100101 Firefox/117.0";
184 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
185 geturls_ua_chrome_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
186 geturls_ua_chromium_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
187 geturls_ua_palemoon_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
188 geturls_ua_opera_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
189 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
190 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; Trident/7.0; rv:11.0) like Gecko";
191 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
192 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
193 if(platform
.python_implementation()!=""):
194 py_implementation
= platform
.python_implementation();
195 if(platform
.python_implementation()==""):
196 py_implementation
= "Python";
197 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=py_implementation
, pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
198 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
199 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
200 geturls_ua
= geturls_ua_firefox_windows7
;
201 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
202 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
203 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
204 geturls_headers_chrome_windows7
.update(windows7_ua_addon
);
205 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
206 geturls_headers_chromium_windows7
.update(windows7_ua_addon
);
207 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
208 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
209 geturls_headers_opera_windows7
.update(windows7_ua_addon
);
210 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
211 geturls_headers_vivaldi_windows7
.update(windows7_ua_addon
);
212 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
213 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
214 geturls_headers_microsoft_edge_windows7
.update(windows7_ua_addon
);
215 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
216 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
217 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
218 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
219 geturls_headers
= geturls_headers_firefox_windows7
;
220 geturls_download_sleep
= 0;
222 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
223 if(outtype
=="print" and dbgenable
):
226 elif(outtype
=="log" and dbgenable
):
227 logging
.info(dbgtxt
);
229 elif(outtype
=="warning" and dbgenable
):
230 logging
.warning(dbgtxt
);
232 elif(outtype
=="error" and dbgenable
):
233 logging
.error(dbgtxt
);
235 elif(outtype
=="critical" and dbgenable
):
236 logging
.critical(dbgtxt
);
238 elif(outtype
=="exception" and dbgenable
):
239 logging
.exception(dbgtxt
);
241 elif(outtype
=="logalt" and dbgenable
):
242 logging
.log(dgblevel
, dbgtxt
);
244 elif(outtype
=="debug" and dbgenable
):
245 logging
.debug(dbgtxt
);
253 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
254 dbgout
= verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
259 def add_url_param(url
, **params
):
261 parts
= list(urlparse
.urlsplit(url
));
262 d
= dict(cgi
.parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
264 parts
[n
]=urlencode(d
);
265 return urlparse
.urlunsplit(parts
);
267 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
268 def which_exec(execfile):
269 for path
in os
.environ
["PATH"].split(":"):
270 if os
.path
.exists(path
+ "/" + execfile):
271 return path
+ "/" + execfile;
273 def listize(varlist
):
281 newlistreg
.update({ilx
: varlist
[il
]});
282 newlistrev
.update({varlist
[il
]: ilx
});
285 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
288 def twolistize(varlist
):
298 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
299 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
300 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
301 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
304 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
305 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
306 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
309 def arglistize(proexec
, *varlist
):
313 newarglist
= [proexec
];
315 if varlist
[il
][0] is not None:
316 newarglist
.append(varlist
[il
][0]);
317 if varlist
[il
][1] is not None:
318 newarglist
.append(varlist
[il
][1]);
322 def fix_header_names(header_dict
):
323 if(sys
.version
[0]=="2"):
324 header_dict
= {k
.title(): v
for k
, v
in header_dict
.iteritems()};
325 if(sys
.version
[0]>="3"):
326 header_dict
= {k
.title(): v
for k
, v
in header_dict
.items()};
329 # hms_string by ArcGIS Python Recipes
330 # https://arcpy.wordpress.com/2012/04/20/146/
331 def hms_string(sec_elapsed
):
332 h
= int(sec_elapsed
/ (60 * 60));
333 m
= int((sec_elapsed
% (60 * 60)) / 60);
334 s
= sec_elapsed
% 60.0;
335 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
337 # get_readable_size by Lipis
338 # http://stackoverflow.com/posts/14998888/revisions
339 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
341 if(unit
!="IEC" and unit
!="SI"):
344 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
345 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
348 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
349 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
354 if abs(bytes
) < unitsize
:
355 strformat
= "%3."+str(precision
)+"f%s";
356 pre_return_val
= (strformat
% (bytes
, unit
));
357 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
358 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
359 alt_return_val
= pre_return_val
.split();
360 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
363 strformat
= "%."+str(precision
)+"f%s";
364 pre_return_val
= (strformat
% (bytes
, "YiB"));
365 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
366 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
367 alt_return_val
= pre_return_val
.split();
368 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
371 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
373 usehashtypes
= usehashtypes
.lower();
374 getfilesize
= os
.path
.getsize(infile
);
375 return_val
= get_readable_size(getfilesize
, precision
, unit
);
377 hashtypelist
= usehashtypes
.split(",");
378 openfile
= open(infile
, "rb");
379 filecontents
= openfile
.read();
382 listnumend
= len(hashtypelist
);
383 while(listnumcount
< listnumend
):
384 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
385 hashtypelistup
= hashtypelistlow
.upper();
386 filehash
= hashlib
.new(hashtypelistup
);
387 filehash
.update(filecontents
);
388 filegethash
= filehash
.hexdigest();
389 return_val
.update({hashtypelistup
: filegethash
});
393 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
395 usehashtypes
= usehashtypes
.lower();
396 getfilesize
= len(instring
);
397 return_val
= get_readable_size(getfilesize
, precision
, unit
);
399 hashtypelist
= usehashtypes
.split(",");
401 listnumend
= len(hashtypelist
);
402 while(listnumcount
< listnumend
):
403 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
404 hashtypelistup
= hashtypelistlow
.upper();
405 filehash
= hashlib
.new(hashtypelistup
);
406 if(sys
.version
[0]=="2"):
407 filehash
.update(instring
);
408 if(sys
.version
[0]>="3"):
409 filehash
.update(instring
.encode('utf-8'));
410 filegethash
= filehash
.hexdigest();
411 return_val
.update({hashtypelistup
: filegethash
});
415 def http_status_to_reason(code
):
418 101: 'Switching Protocols',
423 203: 'Non-Authoritative Information',
425 205: 'Reset Content',
426 206: 'Partial Content',
428 208: 'Already Reported',
430 300: 'Multiple Choices',
431 301: 'Moved Permanently',
436 307: 'Temporary Redirect',
437 308: 'Permanent Redirect',
440 402: 'Payment Required',
443 405: 'Method Not Allowed',
444 406: 'Not Acceptable',
445 407: 'Proxy Authentication Required',
446 408: 'Request Timeout',
449 411: 'Length Required',
450 412: 'Precondition Failed',
451 413: 'Payload Too Large',
453 415: 'Unsupported Media Type',
454 416: 'Range Not Satisfiable',
455 417: 'Expectation Failed',
456 421: 'Misdirected Request',
457 422: 'Unprocessable Entity',
459 424: 'Failed Dependency',
460 426: 'Upgrade Required',
461 428: 'Precondition Required',
462 429: 'Too Many Requests',
463 431: 'Request Header Fields Too Large',
464 451: 'Unavailable For Legal Reasons',
465 500: 'Internal Server Error',
466 501: 'Not Implemented',
468 503: 'Service Unavailable',
469 504: 'Gateway Timeout',
470 505: 'HTTP Version Not Supported',
471 506: 'Variant Also Negotiates',
472 507: 'Insufficient Storage',
473 508: 'Loop Detected',
475 511: 'Network Authentication Required'
477 return reasons
.get(code
, 'Unknown Status Code');
479 def ftp_status_to_reason(code
):
481 110: 'Restart marker reply',
482 120: 'Service ready in nnn minutes',
483 125: 'Data connection already open; transfer starting',
484 150: 'File status okay; about to open data connection',
486 202: 'Command not implemented, superfluous at this site',
487 211: 'System status, or system help reply',
488 212: 'Directory status',
491 215: 'NAME system type',
492 220: 'Service ready for new user',
493 221: 'Service closing control connection',
494 225: 'Data connection open; no transfer in progress',
495 226: 'Closing data connection',
496 227: 'Entering Passive Mode',
497 230: 'User logged in, proceed',
498 250: 'Requested file action okay, completed',
499 257: '"PATHNAME" created',
500 331: 'User name okay, need password',
501 332: 'Need account for login',
502 350: 'Requested file action pending further information',
503 421: 'Service not available, closing control connection',
504 425: 'Can\'t open data connection',
505 426: 'Connection closed; transfer aborted',
506 450: 'Requested file action not taken',
507 451: 'Requested action aborted. Local error in processing',
508 452: 'Requested action not taken. Insufficient storage space in system',
509 500: 'Syntax error, command unrecognized',
510 501: 'Syntax error in parameters or arguments',
511 502: 'Command not implemented',
512 503: 'Bad sequence of commands',
513 504: 'Command not implemented for that parameter',
514 530: 'Not logged in',
515 532: 'Need account for storing files',
516 550: 'Requested action not taken. File unavailable',
517 551: 'Requested action aborted. Page type unknown',
518 552: 'Requested file action aborted. Exceeded storage allocation',
519 553: 'Requested action not taken. File name not allowed'
521 return reasons
.get(code
, 'Unknown Status Code');
523 def sftp_status_to_reason(code
):
527 2: 'SSH_FX_NO_SUCH_FILE',
528 3: 'SSH_FX_PERMISSION_DENIED',
530 5: 'SSH_FX_BAD_MESSAGE',
531 6: 'SSH_FX_NO_CONNECTION',
532 7: 'SSH_FX_CONNECTION_LOST',
533 8: 'SSH_FX_OP_UNSUPPORTED'
535 return reasons
.get(code
, 'Unknown Status Code');
537 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
538 if isinstance(headers
, dict):
540 if(sys
.version
[0]=="2"):
541 for headkey
, headvalue
in headers
.iteritems():
542 returnval
.append((headkey
, headvalue
));
543 if(sys
.version
[0]>="3"):
544 for headkey
, headvalue
in headers
.items():
545 returnval
.append((headkey
, headvalue
));
546 elif isinstance(headers
, list):
552 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
553 if isinstance(headers
, dict):
555 if(sys
.version
[0]=="2"):
556 for headkey
, headvalue
in headers
.iteritems():
557 returnval
.append(headkey
+": "+headvalue
);
558 if(sys
.version
[0]>="3"):
559 for headkey
, headvalue
in headers
.items():
560 returnval
.append(headkey
+": "+headvalue
);
561 elif isinstance(headers
, list):
567 def make_http_headers_from_pycurl_to_dict(headers
):
569 headers
= headers
.strip().split('\r\n');
570 for header
in headers
:
571 parts
= header
.split(': ', 1)
574 header_dict
[key
.title()] = value
;
577 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", compression_supported
), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
578 if isinstance(headers
, list):
583 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
585 elif isinstance(headers
, dict):
591 def get_httplib_support(checkvalue
=None):
592 global haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
594 returnval
.append("ftp");
595 returnval
.append("httplib");
597 returnval
.append("httplib2");
598 returnval
.append("urllib");
600 returnval
.append("urllib3");
601 returnval
.append("request3");
602 returnval
.append("request");
604 returnval
.append("requests");
606 returnval
.append("aiohttp");
608 returnval
.append("httpx");
609 returnval
.append("httpx2");
611 returnval
.append("mechanize");
613 returnval
.append("pycurl");
614 if(hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
615 returnval
.append("pycurl2");
616 if(hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
617 returnval
.append("pycurl3");
619 returnval
.append("sftp");
621 returnval
.append("pysftp");
622 if(not checkvalue
is None):
623 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
624 checkvalue
= "urllib";
625 if(checkvalue
=="httplib1"):
626 checkvalue
= "httplib";
627 if(checkvalue
in returnval
):
633 def check_httplib_support(checkvalue
="urllib"):
634 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
635 checkvalue
= "urllib";
636 if(checkvalue
=="httplib1"):
637 checkvalue
= "httplib";
638 returnval
= get_httplib_support(checkvalue
);
641 def get_httplib_support_list():
642 returnval
= get_httplib_support(None);
645 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1, timeout
=10):
646 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
648 sleep
= geturls_download_sleep
;
651 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
652 httplibuse
= "urllib";
653 if(httplibuse
=="httplib1"):
654 httplibuse
= "httplib";
655 if(not haverequests
and httplibuse
=="requests"):
656 httplibuse
= "urllib";
657 if(not haveaiohttp
and httplibuse
=="aiohttp"):
658 httplibuse
= "urllib";
659 if(not havehttpx
and httplibuse
=="httpx"):
660 httplibuse
= "urllib";
661 if(not havehttpx
and httplibuse
=="httpx2"):
662 httplibuse
= "urllib";
663 if(not havehttpcore
and httplibuse
=="httpcore"):
664 httplibuse
= "urllib";
665 if(not havehttpcore
and httplibuse
=="httpcore2"):
666 httplibuse
= "urllib";
667 if(not havemechanize
and httplibuse
=="mechanize"):
668 httplibuse
= "urllib";
669 if(not havepycurl
and httplibuse
=="pycurl"):
670 httplibuse
= "urllib";
671 if(not havepycurl
and httplibuse
=="pycurl2"):
672 httplibuse
= "urllib";
673 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
674 httplibuse
= "pycurl";
675 if(not havepycurl
and httplibuse
=="pycurl3"):
676 httplibuse
= "urllib";
677 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
678 httplibuse
= "pycurl2";
679 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
680 httplibuse
= "pycurl";
681 if(not havehttplib2
and httplibuse
=="httplib2"):
682 httplibuse
= "httplib";
683 if(not haveparamiko
and httplibuse
=="sftp"):
685 if(not havepysftp
and httplibuse
=="pysftp"):
687 if(httplibuse
=="urllib" or httplibuse
=="request"):
688 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
689 elif(httplibuse
=="request"):
690 returnval
= download_from_url_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
691 elif(httplibuse
=="request3"):
692 returnval
= download_from_url_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
693 elif(httplibuse
=="httplib"):
694 returnval
= download_from_url_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
695 elif(httplibuse
=="httplib2"):
696 returnval
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
697 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
698 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
699 elif(httplibuse
=="requests"):
700 returnval
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
701 elif(httplibuse
=="aiohttp"):
702 returnval
= download_from_url_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
703 elif(httplibuse
=="httpx"):
704 returnval
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
705 elif(httplibuse
=="httpx2"):
706 returnval
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
707 elif(httplibuse
=="httpcore"):
708 returnval
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
709 elif(httplibuse
=="httpcore2"):
710 returnval
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
711 elif(httplibuse
=="mechanize"):
712 returnval
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
713 elif(httplibuse
=="pycurl"):
714 returnval
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
715 elif(httplibuse
=="pycurl2"):
716 returnval
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
717 elif(httplibuse
=="pycurl3"):
718 returnval
= download_from_url_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
719 elif(httplibuse
=="ftp"):
720 returnval
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
721 elif(httplibuse
=="sftp"):
722 returnval
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
723 elif(httplibuse
=="pysftp"):
724 returnval
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
729 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
730 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
732 sleep
= geturls_download_sleep
;
735 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
736 httplibuse
= "urllib";
737 if(httplibuse
=="httplib1"):
738 httplibuse
= "httplib";
739 if(not haverequests
and httplibuse
=="requests"):
740 httplibuse
= "urllib";
741 if(not haveaiohttp
and httplibuse
=="aiohttp"):
742 httplibuse
= "urllib";
743 if(not havehttpx
and httplibuse
=="httpx"):
744 httplibuse
= "urllib";
745 if(not havehttpx
and httplibuse
=="httpx2"):
746 httplibuse
= "urllib";
747 if(not havehttpcore
and httplibuse
=="httpcore"):
748 httplibuse
= "urllib";
749 if(not havehttpcore
and httplibuse
=="httpcore2"):
750 httplibuse
= "urllib";
751 if(not havemechanize
and httplibuse
=="mechanize"):
752 httplibuse
= "urllib";
753 if(not havepycurl
and httplibuse
=="pycurl"):
754 httplibuse
= "urllib";
755 if(not havepycurl
and httplibuse
=="pycurl2"):
756 httplibuse
= "urllib";
757 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
758 httplibuse
= "pycurl";
759 if(not havepycurl
and httplibuse
=="pycurl3"):
760 httplibuse
= "urllib";
761 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
762 httplibuse
= "pycurl2";
763 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
764 httplibuse
= "pycurl";
765 if(not havehttplib2
and httplibuse
=="httplib2"):
766 httplibuse
= "httplib";
767 if(not haveparamiko
and httplibuse
=="sftp"):
769 if(not haveparamiko
and httplibuse
=="pysftp"):
771 if(httplibuse
=="urllib" or httplibuse
=="request"):
772 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
773 elif(httplibuse
=="request"):
774 returnval
= download_from_url_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
775 elif(httplibuse
=="request3"):
776 returnval
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
777 elif(httplibuse
=="httplib"):
778 returnval
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
779 elif(httplibuse
=="httplib2"):
780 returnval
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
781 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
782 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
783 elif(httplibuse
=="requests"):
784 returnval
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
785 elif(httplibuse
=="aiohttp"):
786 returnval
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
787 elif(httplibuse
=="httpx"):
788 returnval
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
789 elif(httplibuse
=="httpx2"):
790 returnval
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
791 elif(httplibuse
=="httpcore"):
792 returnval
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
793 elif(httplibuse
=="httpcore2"):
794 returnval
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
795 elif(httplibuse
=="mechanize"):
796 returnval
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
797 elif(httplibuse
=="pycurl"):
798 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
799 elif(httplibuse
=="pycurl2"):
800 returnval
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
801 elif(httplibuse
=="pycurl3"):
802 returnval
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
803 elif(httplibuse
=="ftp"):
804 returnval
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
805 elif(httplibuse
=="sftp"):
806 returnval
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
807 elif(httplibuse
=="pysftp"):
808 returnval
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
813 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
814 global geturls_download_sleep
, havezstd
, havebrotli
, haveaiohttp
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
816 sleep
= geturls_download_sleep
;
819 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
820 httplibuse
= "urllib";
821 if(httplibuse
=="httplib1"):
822 httplibuse
= "httplib";
823 if(not haverequests
and httplibuse
=="requests"):
824 httplibuse
= "urllib";
825 if(not haveaiohttp
and httplibuse
=="aiohttp"):
826 httplibuse
= "urllib";
827 if(not havehttpx
and httplibuse
=="httpx"):
828 httplibuse
= "urllib";
829 if(not havehttpx
and httplibuse
=="httpx2"):
830 httplibuse
= "urllib";
831 if(not havehttpcore
and httplibuse
=="httpcore"):
832 httplibuse
= "urllib";
833 if(not havehttpcore
and httplibuse
=="httpcore2"):
834 httplibuse
= "urllib";
835 if(not havemechanize
and httplibuse
=="mechanize"):
836 httplibuse
= "urllib";
837 if(not havepycurl
and httplibuse
=="pycurl"):
838 httplibuse
= "urllib";
839 if(not havepycurl
and httplibuse
=="pycurl2"):
840 httplibuse
= "urllib";
841 if(havepycurl
and httplibuse
=="pycurl2" and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
842 httplibuse
= "pycurl";
843 if(not havepycurl
and httplibuse
=="pycurl3"):
844 httplibuse
= "urllib";
845 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
846 httplibuse
= "pycurl2";
847 if(havepycurl
and httplibuse
=="pycurl3" and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
848 httplibuse
= "pycurl";
849 if(not havehttplib2
and httplibuse
=="httplib2"):
850 httplibuse
= "httplib";
851 if(not haveparamiko
and httplibuse
=="sftp"):
853 if(not havepysftp
and httplibuse
=="pysftp"):
855 if(httplibuse
=="urllib" or httplibuse
=="request"):
856 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
857 elif(httplibuse
=="request"):
858 returnval
= download_from_url_to_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
859 elif(httplibuse
=="request3"):
860 returnval
= download_from_url_to_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
861 elif(httplibuse
=="httplib"):
862 returnval
= download_from_url_to_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
863 elif(httplibuse
=="httplib2"):
864 returnval
= download_from_url_to_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
865 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
866 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
867 elif(httplibuse
=="requests"):
868 returnval
= download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
869 elif(httplibuse
=="aiohttp"):
870 returnval
= download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
871 elif(httplibuse
=="httpx"):
872 returnval
= download_from_url_to_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
873 elif(httplibuse
=="httpx2"):
874 returnval
= download_from_url_to_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
875 elif(httplibuse
=="httpcore"):
876 returnval
= download_from_url_to_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
877 elif(httplibuse
=="httpcore2"):
878 returnval
= download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
879 elif(httplibuse
=="mechanize"):
880 returnval
= download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
881 elif(httplibuse
=="pycurl"):
882 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
883 elif(httplibuse
=="pycurl2"):
884 returnval
= download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
885 elif(httplibuse
=="pycurl3"):
886 returnval
= download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
887 elif(httplibuse
=="ftp"):
888 returnval
= download_from_url_to_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
889 elif(httplibuse
=="sftp"):
890 returnval
= download_from_url_to_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
891 elif(httplibuse
=="pysftp"):
892 returnval
= download_from_url_to_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
897 def download_from_url_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
898 global geturls_download_sleep
, havezstd
, havebrotli
;
900 sleep
= geturls_download_sleep
;
903 urlparts
= urlparse
.urlparse(httpurl
);
904 if(isinstance(httpheaders
, list)):
905 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
906 httpheaders
= fix_header_names(httpheaders
);
907 if(httpuseragent
is not None):
908 if('User-Agent' in httpheaders
):
909 httpheaders
['User-Agent'] = httpuseragent
;
911 httpuseragent
.update({'User-Agent': httpuseragent
});
912 if(httpreferer
is not None):
913 if('Referer' in httpheaders
):
914 httpheaders
['Referer'] = httpreferer
;
916 httpuseragent
.update({'Referer': httpreferer
});
917 if(urlparts
.username
is not None or urlparts
.password
is not None):
918 if(sys
.version
[0]=="2"):
919 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
920 if(sys
.version
[0]>="3"):
921 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
922 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
923 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
924 if(isinstance(httpheaders
, dict)):
925 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
926 geturls_opener
.addheaders
= httpheaders
;
928 if(postdata
is not None and not isinstance(postdata
, dict)):
929 postdata
= urlencode(postdata
);
931 geturls_request
= Request(httpurl
);
932 if(httpmethod
=="GET"):
933 geturls_text
= geturls_opener
.open(geturls_request
);
934 elif(httpmethod
=="POST"):
935 geturls_text
= geturls_opener
.open(geturls_request
, data
=postdata
);
937 geturls_text
= geturls_opener
.open(geturls_request
);
938 except HTTPError
as geturls_text_error
:
939 geturls_text
= geturls_text_error
;
940 log
.info("Error With URL "+httpurl
);
942 log
.info("Error With URL "+httpurl
);
944 except socket
.timeout
:
945 log
.info("Error With URL "+httpurl
);
947 httpcodeout
= geturls_text
.getcode();
949 httpcodereason
= geturls_text
.reason
;
950 except AttributeError:
951 httpcodereason
= http_status_to_reason(geturls_text
.getcode());
953 httpversionout
= geturls_text
.version
;
954 except AttributeError:
955 httpversionout
= "1.1";
956 httpmethodout
= geturls_request
.get_method();
957 httpurlout
= geturls_text
.geturl();
958 httpheaderout
= geturls_text
.info();
959 httpheadersentout
= httpheaders
;
960 if(isinstance(httpheaderout
, list)):
961 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
962 httpheaderout
= fix_header_names(httpheaderout
);
963 if(sys
.version
[0]=="2"):
965 prehttpheaderout
= httpheaderout
;
966 httpheaderkeys
= httpheaderout
.keys();
967 imax
= len(httpheaderkeys
);
971 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
973 except AttributeError:
975 if(isinstance(httpheadersentout
, list)):
976 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
977 httpheadersentout
= fix_header_names(httpheadersentout
);
978 log
.info("Downloading URL "+httpurl
);
979 downloadsize
= httpheaderout
.get('Content-Length');
980 if(downloadsize
is not None):
981 downloadsize
= int(downloadsize
);
982 if downloadsize
is None: downloadsize
= 0;
985 log
.info("Downloading URL "+httpurl
);
986 with
BytesIO() as strbuf
:
988 databytes
= geturls_text
.read(buffersize
);
989 if not databytes
: break;
990 datasize
= len(databytes
);
991 fulldatasize
= datasize
+ fulldatasize
;
994 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
995 downloaddiff
= fulldatasize
- prevdownsize
;
996 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
997 prevdownsize
= fulldatasize
;
998 strbuf
.write(databytes
);
1000 returnval_content
= strbuf
.read();
1001 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1003 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1006 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1008 returnval_content
= zlib
.decompress(returnval_content
);
1011 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1013 returnval_content
= brotli
.decompress(returnval_content
);
1014 except brotli
.error
:
1016 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1018 returnval_content
= zstandard
.decompress(returnval_content
);
1019 except zstandard
.error
:
1021 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1022 geturls_text
.close();
1025 def download_from_url_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1026 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1027 exec_time_start
= time
.time();
1028 myhash
= hashlib
.new("sha1");
1029 if(sys
.version
[0]=="2"):
1030 myhash
.update(httpurl
);
1031 myhash
.update(str(buffersize
));
1032 myhash
.update(str(exec_time_start
));
1033 if(sys
.version
[0]>="3"):
1034 myhash
.update(httpurl
.encode('utf-8'));
1035 myhash
.update(str(buffersize
).encode('utf-8'));
1036 myhash
.update(str(exec_time_start
).encode('utf-8'));
1037 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1039 sleep
= geturls_download_sleep
;
1042 pretmpfilename
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1043 if(not pretmpfilename
):
1045 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1046 tmpfilename
= f
.name
;
1048 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1049 except AttributeError:
1051 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1056 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
1057 f
.write(pretmpfilename
['Content']);
1059 exec_time_end
= time
.time();
1060 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1061 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1064 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1065 global geturls_download_sleep
, havezstd
, havebrotli
;
1067 sleep
= geturls_download_sleep
;
1070 if(not outfile
=="-"):
1071 outpath
= outpath
.rstrip(os
.path
.sep
);
1072 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1073 if(not os
.path
.exists(outpath
)):
1074 os
.makedirs(outpath
);
1075 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1077 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1079 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1080 if(not pretmpfilename
):
1082 tmpfilename
= pretmpfilename
['Filename'];
1083 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1085 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1086 exec_time_start
= time
.time();
1087 shutil
.move(tmpfilename
, filepath
);
1089 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1090 except AttributeError:
1092 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1097 exec_time_end
= time
.time();
1098 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1099 if(os
.path
.exists(tmpfilename
)):
1100 os
.remove(tmpfilename
);
1101 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1103 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1104 tmpfilename
= pretmpfilename
['Filename'];
1105 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1108 exec_time_start
= time
.time();
1109 with
open(tmpfilename
, 'rb') as ft
:
1112 databytes
= ft
.read(buffersize
[1]);
1113 if not databytes
: break;
1114 datasize
= len(databytes
);
1115 fulldatasize
= datasize
+ fulldatasize
;
1118 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1119 downloaddiff
= fulldatasize
- prevdownsize
;
1120 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1121 prevdownsize
= fulldatasize
;
1124 fdata
= f
.getvalue();
1127 os
.remove(tmpfilename
);
1128 exec_time_end
= time
.time();
1129 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1130 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1133 def download_from_url_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1134 global geturls_download_sleep
, havezstd
, havebrotli
, havezstd
, havebrotli
;
1136 sleep
= geturls_download_sleep
;
1139 urlparts
= urlparse
.urlparse(httpurl
);
1140 if(isinstance(httpheaders
, list)):
1141 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1142 httpheaders
= fix_header_names(httpheaders
);
1143 if(httpuseragent
is not None):
1144 if('User-Agent' in httpheaders
):
1145 httpheaders
['User-Agent'] = httpuseragent
;
1147 httpuseragent
.update({'User-Agent': httpuseragent
});
1148 if(httpreferer
is not None):
1149 if('Referer' in httpheaders
):
1150 httpheaders
['Referer'] = httpreferer
;
1152 httpuseragent
.update({'Referer': httpreferer
});
1153 if(urlparts
.username
is not None or urlparts
.password
is not None):
1154 if(sys
.version
[0]=="2"):
1155 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1156 if(sys
.version
[0]>="3"):
1157 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1158 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1159 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1160 geturls_opener
.addheaders
= httpheaders
;
1162 if(urlparts
[0]=="http"):
1163 httpconn
= HTTPConnection(urlparts
[1], timeout
=timeout
);
1164 elif(urlparts
[0]=="https"):
1165 httpconn
= HTTPSConnection(urlparts
[1], timeout
=timeout
);
1168 if(postdata
is not None and not isinstance(postdata
, dict)):
1169 postdata
= urlencode(postdata
);
1171 if(httpmethod
=="GET"):
1172 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1173 elif(httpmethod
=="POST"):
1174 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1176 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1177 except socket
.timeout
:
1178 log
.info("Error With URL "+httpurl
);
1180 except socket
.gaierror
:
1181 log
.info("Error With URL "+httpurl
);
1183 except BlockingIOError
:
1184 log
.info("Error With URL "+httpurl
);
1186 geturls_text
= httpconn
.getresponse();
1187 httpcodeout
= geturls_text
.status
;
1188 httpcodereason
= geturls_text
.reason
;
1189 if(geturls_text
.version
=="10"):
1190 httpversionout
= "1.0";
1192 httpversionout
= "1.1";
1193 httpmethodout
= geturls_text
._method
;
1194 httpurlout
= httpurl
;
1195 httpheaderout
= geturls_text
.getheaders();
1196 httpheadersentout
= httpheaders
;
1197 if(isinstance(httpheaderout
, list)):
1198 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1199 if(sys
.version
[0]=="2"):
1201 prehttpheaderout
= httpheaderout
;
1202 httpheaderkeys
= httpheaderout
.keys();
1203 imax
= len(httpheaderkeys
);
1207 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1209 except AttributeError:
1211 httpheaderout
= fix_header_names(httpheaderout
);
1212 if(isinstance(httpheadersentout
, list)):
1213 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1214 httpheadersentout
= fix_header_names(httpheadersentout
);
1215 log
.info("Downloading URL "+httpurl
);
1216 downloadsize
= httpheaderout
.get('Content-Length');
1217 if(downloadsize
is not None):
1218 downloadsize
= int(downloadsize
);
1219 if downloadsize
is None: downloadsize
= 0;
1222 log
.info("Downloading URL "+httpurl
);
1223 with
BytesIO() as strbuf
:
1225 databytes
= geturls_text
.read(buffersize
);
1226 if not databytes
: break;
1227 datasize
= len(databytes
);
1228 fulldatasize
= datasize
+ fulldatasize
;
1231 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1232 downloaddiff
= fulldatasize
- prevdownsize
;
1233 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1234 prevdownsize
= fulldatasize
;
1235 strbuf
.write(databytes
);
1237 returnval_content
= strbuf
.read();
1238 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1240 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1243 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1245 returnval_content
= zlib
.decompress(returnval_content
);
1248 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1250 returnval_content
= brotli
.decompress(returnval_content
);
1251 except brotli
.error
:
1253 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1255 returnval_content
= zstandard
.decompress(returnval_content
);
1256 except zstandard
.error
:
1258 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1259 geturls_text
.close();
1262 def download_from_url_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1263 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1264 exec_time_start
= time
.time();
1265 myhash
= hashlib
.new("sha1");
1266 if(sys
.version
[0]=="2"):
1267 myhash
.update(httpurl
);
1268 myhash
.update(str(buffersize
));
1269 myhash
.update(str(exec_time_start
));
1270 if(sys
.version
[0]>="3"):
1271 myhash
.update(httpurl
.encode('utf-8'));
1272 myhash
.update(str(buffersize
).encode('utf-8'));
1273 myhash
.update(str(exec_time_start
).encode('utf-8'));
1274 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1276 sleep
= geturls_download_sleep
;
1279 pretmpfilename
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1280 if(not pretmpfilename
):
1282 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1283 tmpfilename
= f
.name
;
1285 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1286 except AttributeError:
1288 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1293 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
1294 f
.write(pretmpfilename
['Content']);
1296 exec_time_end
= time
.time();
1297 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1298 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1301 def download_from_url_to_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1302 global geturls_download_sleep
, havezstd
, havebrotli
;
1304 sleep
= geturls_download_sleep
;
1307 if(not outfile
=="-"):
1308 outpath
= outpath
.rstrip(os
.path
.sep
);
1309 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1310 if(not os
.path
.exists(outpath
)):
1311 os
.makedirs(outpath
);
1312 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1314 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1316 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1317 if(not pretmpfilename
):
1319 tmpfilename
= pretmpfilename
['Filename'];
1320 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1322 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1323 exec_time_start
= time
.time();
1324 shutil
.move(tmpfilename
, filepath
);
1326 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1327 except AttributeError:
1329 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1334 exec_time_end
= time
.time();
1335 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1336 if(os
.path
.exists(tmpfilename
)):
1337 os
.remove(tmpfilename
);
1338 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1340 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1341 tmpfilename
= pretmpfilename
['Filename'];
1342 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1345 exec_time_start
= time
.time();
1346 with
open(tmpfilename
, 'rb') as ft
:
1349 databytes
= ft
.read(buffersize
[1]);
1350 if not databytes
: break;
1351 datasize
= len(databytes
);
1352 fulldatasize
= datasize
+ fulldatasize
;
1355 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1356 downloaddiff
= fulldatasize
- prevdownsize
;
1357 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1358 prevdownsize
= fulldatasize
;
1361 fdata
= f
.getvalue();
1364 os
.remove(tmpfilename
);
1365 exec_time_end
= time
.time();
1366 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1367 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1371 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1372 global geturls_download_sleep
, havezstd
, havebrotli
;
1374 sleep
= geturls_download_sleep
;
1377 urlparts
= urlparse
.urlparse(httpurl
);
1378 if(isinstance(httpheaders
, list)):
1379 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1380 httpheaders
= fix_header_names(httpheaders
);
1381 if(httpuseragent
is not None):
1382 if('User-Agent' in httpheaders
):
1383 httpheaders
['User-Agent'] = httpuseragent
;
1385 httpuseragent
.update({'User-Agent': httpuseragent
});
1386 if(httpreferer
is not None):
1387 if('Referer' in httpheaders
):
1388 httpheaders
['Referer'] = httpreferer
;
1390 httpuseragent
.update({'Referer': httpreferer
});
1391 if(urlparts
.username
is not None or urlparts
.password
is not None):
1392 if(sys
.version
[0]=="2"):
1393 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1394 if(sys
.version
[0]>="3"):
1395 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1396 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1397 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1398 geturls_opener
.addheaders
= httpheaders
;
1400 if(urlparts
[0]=="http"):
1401 httpconn
= HTTPConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
1402 elif(urlparts
[0]=="https"):
1403 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
1406 if(postdata
is not None and not isinstance(postdata
, dict)):
1407 postdata
= urlencode(postdata
);
1409 if(httpmethod
=="GET"):
1410 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1411 elif(httpmethod
=="POST"):
1412 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1414 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1415 except socket
.timeout
:
1416 log
.info("Error With URL "+httpurl
);
1418 except socket
.gaierror
:
1419 log
.info("Error With URL "+httpurl
);
1421 except BlockingIOError
:
1422 log
.info("Error With URL "+httpurl
);
1424 geturls_text
= httpconn
.getresponse();
1425 httpcodeout
= geturls_text
.status
;
1426 httpcodereason
= geturls_text
.reason
;
1427 if(geturls_text
.version
=="10"):
1428 httpversionout
= "1.0";
1430 httpversionout
= "1.1";
1431 httpmethodout
= httpmethod
;
1432 httpurlout
= httpurl
;
1433 httpheaderout
= geturls_text
.getheaders();
1434 httpheadersentout
= httpheaders
;
1435 if(isinstance(httpheaderout
, list)):
1436 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1437 if(sys
.version
[0]=="2"):
1439 prehttpheaderout
= httpheaderout
;
1440 httpheaderkeys
= httpheaderout
.keys();
1441 imax
= len(httpheaderkeys
);
1445 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1447 except AttributeError:
1449 httpheaderout
= fix_header_names(httpheaderout
);
1450 if(isinstance(httpheadersentout
, list)):
1451 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1452 httpheadersentout
= fix_header_names(httpheadersentout
);
1453 log
.info("Downloading URL "+httpurl
);
1454 downloadsize
= httpheaderout
.get('Content-Length');
1455 if(downloadsize
is not None):
1456 downloadsize
= int(downloadsize
);
1457 if downloadsize
is None: downloadsize
= 0;
1460 log
.info("Downloading URL "+httpurl
);
1461 with
BytesIO() as strbuf
:
1463 databytes
= geturls_text
.read(buffersize
);
1464 if not databytes
: break;
1465 datasize
= len(databytes
);
1466 fulldatasize
= datasize
+ fulldatasize
;
1469 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1470 downloaddiff
= fulldatasize
- prevdownsize
;
1471 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1472 prevdownsize
= fulldatasize
;
1473 strbuf
.write(databytes
);
1475 returnval_content
= strbuf
.read();
1476 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1478 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1481 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1483 returnval_content
= zlib
.decompress(returnval_content
);
1486 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1488 returnval_content
= brotli
.decompress(returnval_content
);
1489 except brotli
.error
:
1491 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1493 returnval_content
= zstandard
.decompress(returnval_content
);
1494 except zstandard
.error
:
1496 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1497 geturls_text
.close();
1500 if(not havehttplib2
):
1501 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1502 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1506 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1507 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1508 exec_time_start
= time
.time();
1509 myhash
= hashlib
.new("sha1");
1510 if(sys
.version
[0]=="2"):
1511 myhash
.update(httpurl
);
1512 myhash
.update(str(buffersize
));
1513 myhash
.update(str(exec_time_start
));
1514 if(sys
.version
[0]>="3"):
1515 myhash
.update(httpurl
.encode('utf-8'));
1516 myhash
.update(str(buffersize
).encode('utf-8'));
1517 myhash
.update(str(exec_time_start
).encode('utf-8'));
1518 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1520 sleep
= geturls_download_sleep
;
1523 pretmpfilename
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1524 if(not pretmpfilename
):
1526 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1527 tmpfilename
= f
.name
;
1529 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1530 except AttributeError:
1532 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1537 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
1538 f
.write(pretmpfilename
['Content']);
1540 exec_time_end
= time
.time();
1541 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1542 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1545 if(not havehttplib2
):
1546 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1547 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1551 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1552 global geturls_download_sleep
, havezstd
, havebrotli
;
1554 sleep
= geturls_download_sleep
;
1557 if(not outfile
=="-"):
1558 outpath
= outpath
.rstrip(os
.path
.sep
);
1559 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1560 if(not os
.path
.exists(outpath
)):
1561 os
.makedirs(outpath
);
1562 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1564 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1566 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1567 if(not pretmpfilename
):
1569 tmpfilename
= pretmpfilename
['Filename'];
1570 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1572 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1573 exec_time_start
= time
.time();
1574 shutil
.move(tmpfilename
, filepath
);
1576 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1577 except AttributeError:
1579 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1584 exec_time_end
= time
.time();
1585 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1586 if(os
.path
.exists(tmpfilename
)):
1587 os
.remove(tmpfilename
);
1588 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1590 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1591 tmpfilename
= pretmpfilename
['Filename'];
1592 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1595 exec_time_start
= time
.time();
1596 with
open(tmpfilename
, 'rb') as ft
:
1599 databytes
= ft
.read(buffersize
[1]);
1600 if not databytes
: break;
1601 datasize
= len(databytes
);
1602 fulldatasize
= datasize
+ fulldatasize
;
1605 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1606 downloaddiff
= fulldatasize
- prevdownsize
;
1607 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1608 prevdownsize
= fulldatasize
;
1611 fdata
= f
.getvalue();
1614 os
.remove(tmpfilename
);
1615 exec_time_end
= time
.time();
1616 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1617 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1620 if(not havehttplib2
):
1621 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1622 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1625 def download_from_url_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1626 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1629 def download_from_url_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1630 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1633 def download_from_url_to_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1634 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1638 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1639 global geturls_download_sleep
, havezstd
, havebrotli
;
1641 sleep
= geturls_download_sleep
;
1644 urlparts
= urlparse
.urlparse(httpurl
);
1645 if(isinstance(httpheaders
, list)):
1646 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1647 httpheaders
= fix_header_names(httpheaders
);
1648 if(httpuseragent
is not None):
1649 if('User-Agent' in httpheaders
):
1650 httpheaders
['User-Agent'] = httpuseragent
;
1652 httpuseragent
.update({'User-Agent': httpuseragent
});
1653 if(httpreferer
is not None):
1654 if('Referer' in httpheaders
):
1655 httpheaders
['Referer'] = httpreferer
;
1657 httpuseragent
.update({'Referer': httpreferer
});
1658 if(urlparts
.username
is not None or urlparts
.password
is not None):
1659 if(sys
.version
[0]=="2"):
1660 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1661 if(sys
.version
[0]>="3"):
1662 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1663 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1665 if(postdata
is not None and not isinstance(postdata
, dict)):
1666 postdata
= urlencode(postdata
);
1668 reqsession
= requests
.Session();
1669 if(httpmethod
=="GET"):
1670 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1671 elif(httpmethod
=="POST"):
1672 geturls_text
= reqsession
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1674 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1675 except requests
.exceptions
.ConnectTimeout
:
1676 log
.info("Error With URL "+httpurl
);
1678 except requests
.exceptions
.ConnectError
:
1679 log
.info("Error With URL "+httpurl
);
1681 except socket
.timeout
:
1682 log
.info("Error With URL "+httpurl
);
1684 httpcodeout
= geturls_text
.status_code
;
1685 httpcodereason
= geturls_text
.reason
;
1686 if(geturls_text
.raw
.version
=="10"):
1687 httpversionout
= "1.0";
1689 httpversionout
= "1.1";
1690 httpmethodout
= httpmethod
;
1691 httpurlout
= geturls_text
.url
;
1692 httpheaderout
= geturls_text
.headers
;
1693 httpheadersentout
= geturls_text
.request
.headers
;
1694 if(isinstance(httpheaderout
, list)):
1695 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1696 if(sys
.version
[0]=="2"):
1698 prehttpheaderout
= httpheaderout
;
1699 httpheaderkeys
= httpheaderout
.keys();
1700 imax
= len(httpheaderkeys
);
1704 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1706 except AttributeError:
1708 httpheaderout
= fix_header_names(httpheaderout
);
1709 if(isinstance(httpheadersentout
, list)):
1710 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1711 httpheadersentout
= fix_header_names(httpheadersentout
);
1712 log
.info("Downloading URL "+httpurl
);
1713 downloadsize
= httpheaderout
.get('Content-Length');
1714 if(downloadsize
is not None):
1715 downloadsize
= int(downloadsize
);
1716 if downloadsize
is None: downloadsize
= 0;
1719 log
.info("Downloading URL "+httpurl
);
1720 with
BytesIO() as strbuf
:
1722 databytes
= geturls_text
.raw
.read(buffersize
);
1723 if not databytes
: break;
1724 datasize
= len(databytes
);
1725 fulldatasize
= datasize
+ fulldatasize
;
1728 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1729 downloaddiff
= fulldatasize
- prevdownsize
;
1730 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1731 prevdownsize
= fulldatasize
;
1732 strbuf
.write(databytes
);
1734 returnval_content
= strbuf
.read();
1735 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1737 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1740 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1742 returnval_content
= zlib
.decompress(returnval_content
);
1745 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1747 returnval_content
= brotli
.decompress(returnval_content
);
1748 except brotli
.error
:
1750 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1752 returnval_content
= zstandard
.decompress(returnval_content
);
1753 except zstandard
.error
:
1755 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1756 geturls_text
.close();
1759 if(not haverequests
):
1760 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1761 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1765 def download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1766 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
1767 exec_time_start
= time
.time();
1768 myhash
= hashlib
.new("sha1");
1769 if(sys
.version
[0]=="2"):
1770 myhash
.update(httpurl
);
1771 myhash
.update(str(buffersize
));
1772 myhash
.update(str(exec_time_start
));
1773 if(sys
.version
[0]>="3"):
1774 myhash
.update(httpurl
.encode('utf-8'));
1775 myhash
.update(str(buffersize
).encode('utf-8'));
1776 myhash
.update(str(exec_time_start
).encode('utf-8'));
1777 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1779 sleep
= geturls_download_sleep
;
1782 pretmpfilename
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1783 if(not pretmpfilename
):
1785 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1786 tmpfilename
= f
.name
;
1788 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1789 except AttributeError:
1791 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1796 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
1797 f
.write(pretmpfilename
['Content']);
1799 exec_time_end
= time
.time();
1800 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1801 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1804 if(not haverequests
):
1805 def download_from_url_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1806 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1810 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1811 global geturls_download_sleep
, havezstd
, havebrotli
;
1813 sleep
= geturls_download_sleep
;
1816 if(not outfile
=="-"):
1817 outpath
= outpath
.rstrip(os
.path
.sep
);
1818 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1819 if(not os
.path
.exists(outpath
)):
1820 os
.makedirs(outpath
);
1821 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1823 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1825 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1826 if(not pretmpfilename
):
1828 tmpfilename
= pretmpfilename
['Filename'];
1829 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1831 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1832 exec_time_start
= time
.time();
1833 shutil
.move(tmpfilename
, filepath
);
1835 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1836 except AttributeError:
1838 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1843 exec_time_end
= time
.time();
1844 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1845 if(os
.path
.exists(tmpfilename
)):
1846 os
.remove(tmpfilename
);
1847 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1849 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1850 tmpfilename
= pretmpfilename
['Filename'];
1851 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1854 exec_time_start
= time
.time();
1855 with
open(tmpfilename
, 'rb') as ft
:
1858 databytes
= ft
.read(buffersize
[1]);
1859 if not databytes
: break;
1860 datasize
= len(databytes
);
1861 fulldatasize
= datasize
+ fulldatasize
;
1864 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1865 downloaddiff
= fulldatasize
- prevdownsize
;
1866 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1867 prevdownsize
= fulldatasize
;
1870 fdata
= f
.getvalue();
1873 os
.remove(tmpfilename
);
1874 exec_time_end
= time
.time();
1875 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1876 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1879 if(not haverequests
):
1880 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1881 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1885 def download_from_url_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1886 global geturls_download_sleep
, havezstd
, havebrotli
;
1888 sleep
= geturls_download_sleep
;
1891 urlparts
= urlparse
.urlparse(httpurl
);
1892 if(isinstance(httpheaders
, list)):
1893 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1894 httpheaders
= fix_header_names(httpheaders
);
1895 if(httpuseragent
is not None):
1896 if('User-Agent' in httpheaders
):
1897 httpheaders
['User-Agent'] = httpuseragent
;
1899 httpuseragent
.update({'User-Agent': httpuseragent
});
1900 if(httpreferer
is not None):
1901 if('Referer' in httpheaders
):
1902 httpheaders
['Referer'] = httpreferer
;
1904 httpuseragent
.update({'Referer': httpreferer
});
1905 if(urlparts
.username
is not None or urlparts
.password
is not None):
1906 if(sys
.version
[0]=="2"):
1907 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1908 if(sys
.version
[0]>="3"):
1909 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1910 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1912 if(postdata
is not None and not isinstance(postdata
, dict)):
1913 postdata
= urlencode(postdata
);
1915 reqsession
= aiohttp
.ClientSession(cookie_jar
=httpcookie
, headers
=httpheaders
, timeout
=timeout
, read_timeout
=timeout
, conn_timeout
=timeout
, read_bufsize
=buffersize
);
1916 if(httpmethod
=="GET"):
1917 geturls_text
= reqsession
.get(httpurl
);
1918 elif(httpmethod
=="POST"):
1919 geturls_text
= reqsession
.post(httpurl
, data
=postdata
);
1921 geturls_text
= reqsession
.get(httpurl
);
1922 except aiohttp
.exceptions
.ConnectTimeout
:
1923 log
.info("Error With URL "+httpurl
);
1925 except aiohttp
.exceptions
.ConnectError
:
1926 log
.info("Error With URL "+httpurl
);
1928 except socket
.timeout
:
1929 log
.info("Error With URL "+httpurl
);
1931 httpcodeout
= geturls_text
.status
;
1932 httpcodereason
= geturls_text
.reason
;
1933 httpversionout
= geturls_text
.version
;
1934 httpmethodout
= geturls_text
.method
;
1935 httpurlout
= geturls_text
.url
;
1936 httpheaderout
= geturls_text
.headers
;
1937 httpheadersentout
= geturls_text
.request_info
.headers
;
1938 if(isinstance(httpheaderout
, list)):
1939 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1940 if(sys
.version
[0]=="2"):
1942 prehttpheaderout
= httpheaderout
;
1943 httpheaderkeys
= httpheaderout
.keys();
1944 imax
= len(httpheaderkeys
);
1948 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1950 except AttributeError:
1952 httpheaderout
= fix_header_names(httpheaderout
);
1953 if(isinstance(httpheadersentout
, list)):
1954 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1955 httpheadersentout
= fix_header_names(httpheadersentout
);
1956 log
.info("Downloading URL "+httpurl
);
1957 downloadsize
= httpheaderout
.get('Content-Length');
1958 if(downloadsize
is not None):
1959 downloadsize
= int(downloadsize
);
1960 if downloadsize
is None: downloadsize
= 0;
1963 log
.info("Downloading URL "+httpurl
);
1964 with
BytesIO() as strbuf
:
1966 databytes
= geturls_text
.read(buffersize
);
1967 if not databytes
: break;
1968 datasize
= len(databytes
);
1969 fulldatasize
= datasize
+ fulldatasize
;
1972 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1973 downloaddiff
= fulldatasize
- prevdownsize
;
1974 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1975 prevdownsize
= fulldatasize
;
1976 strbuf
.write(databytes
);
1978 returnval_content
= strbuf
.read();
1979 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1981 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1984 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
1986 returnval_content
= zlib
.decompress(returnval_content
);
1989 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1991 returnval_content
= brotli
.decompress(returnval_content
);
1992 except brotli
.error
:
1994 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
1996 returnval_content
= zstandard
.decompress(returnval_content
);
1997 except zstandard
.error
:
1999 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
2000 geturls_text
.close();
2003 if(not haveaiohttp
):
2004 def download_from_url_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2005 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2009 def download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2010 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2011 exec_time_start
= time
.time();
2012 myhash
= hashlib
.new("sha1");
2013 if(sys
.version
[0]=="2"):
2014 myhash
.update(httpurl
);
2015 myhash
.update(str(buffersize
));
2016 myhash
.update(str(exec_time_start
));
2017 if(sys
.version
[0]>="3"):
2018 myhash
.update(httpurl
.encode('utf-8'));
2019 myhash
.update(str(buffersize
).encode('utf-8'));
2020 myhash
.update(str(exec_time_start
).encode('utf-8'));
2021 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2023 sleep
= geturls_download_sleep
;
2026 pretmpfilename
= download_from_url_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2027 if(not pretmpfilename
):
2029 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2030 tmpfilename
= f
.name
;
2032 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2033 except AttributeError:
2035 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2040 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
2041 f
.write(pretmpfilename
['Content']);
2043 exec_time_end
= time
.time();
2044 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2045 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2048 if(not haveaiohttp
):
2049 def download_from_url_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2050 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2054 def download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2055 global geturls_download_sleep
, havezstd
, havebrotli
;
2057 sleep
= geturls_download_sleep
;
2060 if(not outfile
=="-"):
2061 outpath
= outpath
.rstrip(os
.path
.sep
);
2062 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2063 if(not os
.path
.exists(outpath
)):
2064 os
.makedirs(outpath
);
2065 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2067 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2069 pretmpfilename
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2070 if(not pretmpfilename
):
2072 tmpfilename
= pretmpfilename
['Filename'];
2073 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2075 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2076 exec_time_start
= time
.time();
2077 shutil
.move(tmpfilename
, filepath
);
2079 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2080 except AttributeError:
2082 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2087 exec_time_end
= time
.time();
2088 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2089 if(os
.path
.exists(tmpfilename
)):
2090 os
.remove(tmpfilename
);
2091 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2093 pretmpfilename
= download_from_url_file_with_aiohttp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2094 tmpfilename
= pretmpfilename
['Filename'];
2095 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2098 exec_time_start
= time
.time();
2099 with
open(tmpfilename
, 'rb') as ft
:
2102 databytes
= ft
.read(buffersize
[1]);
2103 if not databytes
: break;
2104 datasize
= len(databytes
);
2105 fulldatasize
= datasize
+ fulldatasize
;
2108 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2109 downloaddiff
= fulldatasize
- prevdownsize
;
2110 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2111 prevdownsize
= fulldatasize
;
2114 fdata
= f
.getvalue();
2117 os
.remove(tmpfilename
);
2118 exec_time_end
= time
.time();
2119 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2120 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2123 if(not haveaiohttp
):
2124 def download_from_url_to_file_with_aiohttp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2125 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2129 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2130 global geturls_download_sleep
, havezstd
, havebrotli
;
2132 sleep
= geturls_download_sleep
;
2135 urlparts
= urlparse
.urlparse(httpurl
);
2136 if(isinstance(httpheaders
, list)):
2137 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2138 httpheaders
= fix_header_names(httpheaders
);
2139 if(httpuseragent
is not None):
2140 if('User-Agent' in httpheaders
):
2141 httpheaders
['User-Agent'] = httpuseragent
;
2143 httpuseragent
.update({'User-Agent': httpuseragent
});
2144 if(httpreferer
is not None):
2145 if('Referer' in httpheaders
):
2146 httpheaders
['Referer'] = httpreferer
;
2148 httpuseragent
.update({'Referer': httpreferer
});
2149 if(urlparts
.username
is not None or urlparts
.password
is not None):
2150 if(sys
.version
[0]=="2"):
2151 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2152 if(sys
.version
[0]>="3"):
2153 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2154 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2156 if(postdata
is not None and not isinstance(postdata
, dict)):
2157 postdata
= urlencode(postdata
);
2159 if(httpmethod
=="GET"):
2160 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2161 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2162 elif(httpmethod
=="POST"):
2163 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2164 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2166 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2167 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2168 except httpx
.ConnectTimeout
:
2169 log
.info("Error With URL "+httpurl
);
2171 except httpx
.ConnectError
:
2172 log
.info("Error With URL "+httpurl
);
2174 except socket
.timeout
:
2175 log
.info("Error With URL "+httpurl
);
2177 httpcodeout
= geturls_text
.status_code
;
2179 httpcodereason
= geturls_text
.reason_phrase
;
2181 httpcodereason
= http_status_to_reason(geturls_text
.status_code
);
2182 httpversionout
= geturls_text
.http_version
;
2183 httpmethodout
= httpmethod
;
2184 httpurlout
= str(geturls_text
.url
);
2185 httpheaderout
= geturls_text
.headers
;
2186 httpheadersentout
= geturls_text
.request
.headers
;
2187 if(isinstance(httpheaderout
, list)):
2188 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2189 if(sys
.version
[0]=="2"):
2191 prehttpheaderout
= httpheaderout
;
2192 httpheaderkeys
= httpheaderout
.keys();
2193 imax
= len(httpheaderkeys
);
2197 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2199 except AttributeError:
2201 httpheaderout
= fix_header_names(httpheaderout
);
2202 if(isinstance(httpheadersentout
, list)):
2203 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2204 httpheadersentout
= fix_header_names(httpheadersentout
);
2205 log
.info("Downloading URL "+httpurl
);
2206 downloadsize
= httpheaderout
.get('Content-Length');
2207 if(downloadsize
is not None):
2208 downloadsize
= int(downloadsize
);
2209 if downloadsize
is None: downloadsize
= 0;
2212 log
.info("Downloading URL "+httpurl
);
2213 with
BytesIO() as strbuf
:
2215 databytes
= geturls_text
.read(buffersize
);
2216 if not databytes
: break;
2217 datasize
= len(databytes
);
2218 fulldatasize
= datasize
+ fulldatasize
;
2221 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2222 downloaddiff
= fulldatasize
- prevdownsize
;
2223 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2224 prevdownsize
= fulldatasize
;
2225 strbuf
.write(databytes
);
2227 returnval_content
= strbuf
.read();
2228 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2230 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2233 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2235 returnval_content
= zlib
.decompress(returnval_content
);
2238 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2240 returnval_content
= brotli
.decompress(returnval_content
);
2241 except brotli
.error
:
2243 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2245 returnval_content
= zstandard
.decompress(returnval_content
);
2246 except zstandard
.error
:
2248 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
2249 geturls_text
.close();
2253 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2254 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2258 def download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2259 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2260 exec_time_start
= time
.time();
2261 myhash
= hashlib
.new("sha1");
2262 if(sys
.version
[0]=="2"):
2263 myhash
.update(httpurl
);
2264 myhash
.update(str(buffersize
));
2265 myhash
.update(str(exec_time_start
));
2266 if(sys
.version
[0]>="3"):
2267 myhash
.update(httpurl
.encode('utf-8'));
2268 myhash
.update(str(buffersize
).encode('utf-8'));
2269 myhash
.update(str(exec_time_start
).encode('utf-8'));
2270 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2272 sleep
= geturls_download_sleep
;
2275 pretmpfilename
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2276 if(not pretmpfilename
):
2278 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2279 tmpfilename
= f
.name
;
2281 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2282 except AttributeError:
2284 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2289 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
2290 f
.write(pretmpfilename
['Content']);
2292 exec_time_end
= time
.time();
2293 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2294 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2298 def download_from_url_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2299 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2303 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2304 global geturls_download_sleep
, havezstd
, havebrotli
;
2306 sleep
= geturls_download_sleep
;
2309 if(not outfile
=="-"):
2310 outpath
= outpath
.rstrip(os
.path
.sep
);
2311 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2312 if(not os
.path
.exists(outpath
)):
2313 os
.makedirs(outpath
);
2314 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2316 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2318 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2319 if(not pretmpfilename
):
2321 tmpfilename
= pretmpfilename
['Filename'];
2322 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2324 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2325 exec_time_start
= time
.time();
2326 shutil
.move(tmpfilename
, filepath
);
2328 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2329 except AttributeError:
2331 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2336 exec_time_end
= time
.time();
2337 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2338 if(os
.path
.exists(tmpfilename
)):
2339 os
.remove(tmpfilename
);
2340 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2342 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2343 tmpfilename
= pretmpfilename
['Filename'];
2344 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2347 exec_time_start
= time
.time();
2348 with
open(tmpfilename
, 'rb') as ft
:
2351 databytes
= ft
.read(buffersize
[1]);
2352 if not databytes
: break;
2353 datasize
= len(databytes
);
2354 fulldatasize
= datasize
+ fulldatasize
;
2357 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2358 downloaddiff
= fulldatasize
- prevdownsize
;
2359 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2360 prevdownsize
= fulldatasize
;
2363 fdata
= f
.getvalue();
2366 os
.remove(tmpfilename
);
2367 exec_time_end
= time
.time();
2368 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2369 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2373 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2374 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2378 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2379 global geturls_download_sleep
, havezstd
, havebrotli
;
2381 sleep
= geturls_download_sleep
;
2384 urlparts
= urlparse
.urlparse(httpurl
);
2385 if(isinstance(httpheaders
, list)):
2386 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2387 httpheaders
= fix_header_names(httpheaders
);
2388 if(httpuseragent
is not None):
2389 if('User-Agent' in httpheaders
):
2390 httpheaders
['User-Agent'] = httpuseragent
;
2392 httpuseragent
.update({'User-Agent': httpuseragent
});
2393 if(httpreferer
is not None):
2394 if('Referer' in httpheaders
):
2395 httpheaders
['Referer'] = httpreferer
;
2397 httpuseragent
.update({'Referer': httpreferer
});
2398 if(urlparts
.username
is not None or urlparts
.password
is not None):
2399 if(sys
.version
[0]=="2"):
2400 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2401 if(sys
.version
[0]>="3"):
2402 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2403 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2405 if(postdata
is not None and not isinstance(postdata
, dict)):
2406 postdata
= urlencode(postdata
);
2408 if(httpmethod
=="GET"):
2409 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2410 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2411 elif(httpmethod
=="POST"):
2412 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2413 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2415 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2416 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2417 except httpx
.ConnectTimeout
:
2418 log
.info("Error With URL "+httpurl
);
2420 except httpx
.ConnectError
:
2421 log
.info("Error With URL "+httpurl
);
2423 except socket
.timeout
:
2424 log
.info("Error With URL "+httpurl
);
2426 httpcodeout
= geturls_text
.status_code
;
2428 httpcodereason
= geturls_text
.reason_phrase
;
2430 httpcodereason
= http_status_to_reason(geturls_text
.status_code
);
2431 httpversionout
= geturls_text
.http_version
;
2432 httpmethodout
= httpmethod
;
2433 httpurlout
= str(geturls_text
.url
);
2434 httpheaderout
= geturls_text
.headers
;
2435 httpheadersentout
= geturls_text
.request
.headers
;
2436 if(isinstance(httpheaderout
, list)):
2437 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2438 if(sys
.version
[0]=="2"):
2440 prehttpheaderout
= httpheaderout
;
2441 httpheaderkeys
= httpheaderout
.keys();
2442 imax
= len(httpheaderkeys
);
2446 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2448 except AttributeError:
2450 httpheaderout
= fix_header_names(httpheaderout
);
2451 if(isinstance(httpheadersentout
, list)):
2452 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2453 httpheadersentout
= fix_header_names(httpheadersentout
);
2454 log
.info("Downloading URL "+httpurl
);
2455 downloadsize
= httpheaderout
.get('Content-Length');
2456 if(downloadsize
is not None):
2457 downloadsize
= int(downloadsize
);
2458 if downloadsize
is None: downloadsize
= 0;
2461 log
.info("Downloading URL "+httpurl
);
2462 with
BytesIO() as strbuf
:
2464 databytes
= geturls_text
.read(buffersize
);
2465 if not databytes
: break;
2466 datasize
= len(databytes
);
2467 fulldatasize
= datasize
+ fulldatasize
;
2470 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2471 downloaddiff
= fulldatasize
- prevdownsize
;
2472 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2473 prevdownsize
= fulldatasize
;
2474 strbuf
.write(databytes
);
2476 returnval_content
= strbuf
.read();
2477 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2479 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2482 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2484 returnval_content
= zlib
.decompress(returnval_content
);
2487 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2489 returnval_content
= brotli
.decompress(returnval_content
);
2490 except brotli
.error
:
2492 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2494 returnval_content
= zstandard
.decompress(returnval_content
);
2495 except zstandard
.error
:
2497 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
2498 geturls_text
.close();
2502 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2503 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2507 def download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2508 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2509 exec_time_start
= time
.time();
2510 myhash
= hashlib
.new("sha1");
2511 if(sys
.version
[0]=="2"):
2512 myhash
.update(httpurl
);
2513 myhash
.update(str(buffersize
));
2514 myhash
.update(str(exec_time_start
));
2515 if(sys
.version
[0]>="3"):
2516 myhash
.update(httpurl
.encode('utf-8'));
2517 myhash
.update(str(buffersize
).encode('utf-8'));
2518 myhash
.update(str(exec_time_start
).encode('utf-8'));
2519 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2521 sleep
= geturls_download_sleep
;
2524 pretmpfilename
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2525 if(not pretmpfilename
):
2527 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2528 tmpfilename
= f
.name
;
2530 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2531 except AttributeError:
2533 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2538 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
2539 f
.write(pretmpfilename
['Content']);
2541 exec_time_end
= time
.time();
2542 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2543 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2547 def download_from_url_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2548 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2552 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2553 global geturls_download_sleep
, havezstd
, havebrotli
;
2555 sleep
= geturls_download_sleep
;
2558 if(not outfile
=="-"):
2559 outpath
= outpath
.rstrip(os
.path
.sep
);
2560 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2561 if(not os
.path
.exists(outpath
)):
2562 os
.makedirs(outpath
);
2563 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2565 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2567 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2568 if(not pretmpfilename
):
2570 tmpfilename
= pretmpfilename
['Filename'];
2571 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2573 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2574 exec_time_start
= time
.time();
2575 shutil
.move(tmpfilename
, filepath
);
2577 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2578 except AttributeError:
2580 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2585 exec_time_end
= time
.time();
2586 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2587 if(os
.path
.exists(tmpfilename
)):
2588 os
.remove(tmpfilename
);
2589 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2591 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2592 tmpfilename
= pretmpfilename
['Filename'];
2593 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2596 exec_time_start
= time
.time();
2597 with
open(tmpfilename
, 'rb') as ft
:
2600 databytes
= ft
.read(buffersize
[1]);
2601 if not databytes
: break;
2602 datasize
= len(databytes
);
2603 fulldatasize
= datasize
+ fulldatasize
;
2606 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2607 downloaddiff
= fulldatasize
- prevdownsize
;
2608 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2609 prevdownsize
= fulldatasize
;
2612 fdata
= f
.getvalue();
2615 os
.remove(tmpfilename
);
2616 exec_time_end
= time
.time();
2617 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2618 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2622 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2623 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2627 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2628 global geturls_download_sleep
, havezstd
, havebrotli
;
2630 sleep
= geturls_download_sleep
;
2633 urlparts
= urlparse
.urlparse(httpurl
);
2634 if(isinstance(httpheaders
, list)):
2635 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2636 httpheaders
= fix_header_names(httpheaders
);
2637 if(httpuseragent
is not None):
2638 if('User-Agent' in httpheaders
):
2639 httpheaders
['User-Agent'] = httpuseragent
;
2641 httpuseragent
.update({'User-Agent': httpuseragent
});
2642 if(httpreferer
is not None):
2643 if('Referer' in httpheaders
):
2644 httpheaders
['Referer'] = httpreferer
;
2646 httpuseragent
.update({'Referer': httpreferer
});
2647 if(urlparts
.username
is not None or urlparts
.password
is not None):
2648 if(sys
.version
[0]=="2"):
2649 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2650 if(sys
.version
[0]>="3"):
2651 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2652 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2654 if(postdata
is not None and not isinstance(postdata
, dict)):
2655 postdata
= urlencode(postdata
);
2657 if(httpmethod
=="GET"):
2658 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2659 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2660 elif(httpmethod
=="POST"):
2661 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2662 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2664 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2665 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2666 except httpcore
.ConnectTimeout
:
2667 log
.info("Error With URL "+httpurl
);
2669 except httpcore
.ConnectError
:
2670 log
.info("Error With URL "+httpurl
);
2672 except socket
.timeout
:
2673 log
.info("Error With URL "+httpurl
);
2675 httpcodeout
= geturls_text
.status
;
2676 httpcodereason
= http_status_to_reason(geturls_text
.status
);
2677 httpversionout
= "1.1";
2678 httpmethodout
= httpmethod
;
2679 httpurlout
= str(httpurl
);
2680 httpheaderout
= geturls_text
.headers
;
2681 httpheadersentout
= httpheaders
;
2682 if(isinstance(httpheaderout
, list)):
2683 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2684 if(sys
.version
[0]=="2"):
2686 prehttpheaderout
= httpheaderout
;
2687 httpheaderkeys
= httpheaderout
.keys();
2688 imax
= len(httpheaderkeys
);
2692 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2694 except AttributeError:
2696 httpheaderout
= fix_header_names(httpheaderout
);
2697 if(isinstance(httpheadersentout
, list)):
2698 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2699 httpheadersentout
= fix_header_names(httpheadersentout
);
2700 log
.info("Downloading URL "+httpurl
);
2701 downloadsize
= httpheaderout
.get('Content-Length');
2702 if(downloadsize
is not None):
2703 downloadsize
= int(downloadsize
);
2704 if downloadsize
is None: downloadsize
= 0;
2707 log
.info("Downloading URL "+httpurl
);
2708 with
BytesIO() as strbuf
:
2710 databytes
= geturls_text
.read(buffersize
);
2711 if not databytes
: break;
2712 datasize
= len(databytes
);
2713 fulldatasize
= datasize
+ fulldatasize
;
2716 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2717 downloaddiff
= fulldatasize
- prevdownsize
;
2718 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2719 prevdownsize
= fulldatasize
;
2720 strbuf
.write(databytes
);
2722 returnval_content
= strbuf
.read();
2723 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2725 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2728 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2730 returnval_content
= zlib
.decompress(returnval_content
);
2733 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2735 returnval_content
= brotli
.decompress(returnval_content
);
2736 except brotli
.error
:
2738 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2740 returnval_content
= zstandard
.decompress(returnval_content
);
2741 except zstandard
.error
:
2743 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
2744 geturls_text
.close();
2747 if(not havehttpcore
):
2748 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2749 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2753 def download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2754 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
2755 exec_time_start
= time
.time();
2756 myhash
= hashlib
.new("sha1");
2757 if(sys
.version
[0]=="2"):
2758 myhash
.update(httpurl
);
2759 myhash
.update(str(buffersize
));
2760 myhash
.update(str(exec_time_start
));
2761 if(sys
.version
[0]>="3"):
2762 myhash
.update(httpurl
.encode('utf-8'));
2763 myhash
.update(str(buffersize
).encode('utf-8'));
2764 myhash
.update(str(exec_time_start
).encode('utf-8'));
2765 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2767 sleep
= geturls_download_sleep
;
2770 pretmpfilename
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2771 if(not pretmpfilename
):
2773 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2774 tmpfilename
= f
.name
;
2776 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2777 except AttributeError:
2779 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2784 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
2785 f
.write(pretmpfilename
['Content']);
2787 exec_time_end
= time
.time();
2788 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2789 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2792 if(not havehttpcore
):
2793 def download_from_url_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2794 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2798 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2799 global geturls_download_sleep
, havezstd
, havebrotli
;
2801 sleep
= geturls_download_sleep
;
2804 if(not outfile
=="-"):
2805 outpath
= outpath
.rstrip(os
.path
.sep
);
2806 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2807 if(not os
.path
.exists(outpath
)):
2808 os
.makedirs(outpath
);
2809 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2811 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2813 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2814 if(not pretmpfilename
):
2816 tmpfilename
= pretmpfilename
['Filename'];
2817 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2819 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2820 exec_time_start
= time
.time();
2821 shutil
.move(tmpfilename
, filepath
);
2823 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2824 except AttributeError:
2826 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2831 exec_time_end
= time
.time();
2832 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2833 if(os
.path
.exists(tmpfilename
)):
2834 os
.remove(tmpfilename
);
2835 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2837 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2838 tmpfilename
= pretmpfilename
['Filename'];
2839 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2842 exec_time_start
= time
.time();
2843 with
open(tmpfilename
, 'rb') as ft
:
2846 databytes
= ft
.read(buffersize
[1]);
2847 if not databytes
: break;
2848 datasize
= len(databytes
);
2849 fulldatasize
= datasize
+ fulldatasize
;
2852 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2853 downloaddiff
= fulldatasize
- prevdownsize
;
2854 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2855 prevdownsize
= fulldatasize
;
2858 fdata
= f
.getvalue();
2861 os
.remove(tmpfilename
);
2862 exec_time_end
= time
.time();
2863 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2864 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2867 if(not havehttpcore
):
2868 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2869 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2873 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2874 global geturls_download_sleep
, havezstd
, havebrotli
;
2876 sleep
= geturls_download_sleep
;
2879 urlparts
= urlparse
.urlparse(httpurl
);
2880 if(isinstance(httpheaders
, list)):
2881 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2882 httpheaders
= fix_header_names(httpheaders
);
2883 if(httpuseragent
is not None):
2884 if('User-Agent' in httpheaders
):
2885 httpheaders
['User-Agent'] = httpuseragent
;
2887 httpuseragent
.update({'User-Agent': httpuseragent
});
2888 if(httpreferer
is not None):
2889 if('Referer' in httpheaders
):
2890 httpheaders
['Referer'] = httpreferer
;
2892 httpuseragent
.update({'Referer': httpreferer
});
2893 if(urlparts
.username
is not None or urlparts
.password
is not None):
2894 if(sys
.version
[0]=="2"):
2895 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2896 if(sys
.version
[0]>="3"):
2897 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2898 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2900 if(postdata
is not None and not isinstance(postdata
, dict)):
2901 postdata
= urlencode(postdata
);
2903 if(httpmethod
=="GET"):
2904 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2905 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2906 elif(httpmethod
=="POST"):
2907 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2908 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2910 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2911 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2912 except httpcore
.ConnectTimeout
:
2913 log
.info("Error With URL "+httpurl
);
2915 except httpcore
.ConnectError
:
2916 log
.info("Error With URL "+httpurl
);
2918 except socket
.timeout
:
2919 log
.info("Error With URL "+httpurl
);
2921 httpcodeout
= geturls_text
.status
;
2922 httpcodereason
= http_status_to_reason(geturls_text
.status
);
2923 httpversionout
= "1.1";
2924 httpmethodout
= httpmethod
;
2925 httpurlout
= str(httpurl
);
2926 httpheaderout
= geturls_text
.headers
;
2927 httpheadersentout
= httpheaders
;
2928 if(isinstance(httpheaderout
, list)):
2929 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2930 if(sys
.version
[0]=="2"):
2932 prehttpheaderout
= httpheaderout
;
2933 httpheaderkeys
= httpheaderout
.keys();
2934 imax
= len(httpheaderkeys
);
2938 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2940 except AttributeError:
2942 httpheaderout
= fix_header_names(httpheaderout
);
2943 if(isinstance(httpheadersentout
, list)):
2944 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2945 httpheadersentout
= fix_header_names(httpheadersentout
);
2946 log
.info("Downloading URL "+httpurl
);
2947 downloadsize
= httpheaderout
.get('Content-Length');
2948 if(downloadsize
is not None):
2949 downloadsize
= int(downloadsize
);
2950 if downloadsize
is None: downloadsize
= 0;
2953 log
.info("Downloading URL "+httpurl
);
2954 with
BytesIO() as strbuf
:
2956 databytes
= geturls_text
.read(buffersize
);
2957 if not databytes
: break;
2958 datasize
= len(databytes
);
2959 fulldatasize
= datasize
+ fulldatasize
;
2962 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2963 downloaddiff
= fulldatasize
- prevdownsize
;
2964 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2965 prevdownsize
= fulldatasize
;
2966 strbuf
.write(databytes
);
2968 returnval_content
= strbuf
.read();
2969 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2971 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2974 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
2976 returnval_content
= zlib
.decompress(returnval_content
);
2979 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2981 returnval_content
= brotli
.decompress(returnval_content
);
2982 except brotli
.error
:
2984 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
2986 returnval_content
= zstandard
.decompress(returnval_content
);
2987 except zstandard
.error
:
2989 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
2990 geturls_text
.close();
2993 if(not havehttpcore
):
2994 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2995 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2999 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3000 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3001 exec_time_start
= time
.time();
3002 myhash
= hashlib
.new("sha1");
3003 if(sys
.version
[0]=="2"):
3004 myhash
.update(httpurl
);
3005 myhash
.update(str(buffersize
));
3006 myhash
.update(str(exec_time_start
));
3007 if(sys
.version
[0]>="3"):
3008 myhash
.update(httpurl
.encode('utf-8'));
3009 myhash
.update(str(buffersize
).encode('utf-8'));
3010 myhash
.update(str(exec_time_start
).encode('utf-8'));
3011 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3013 sleep
= geturls_download_sleep
;
3016 pretmpfilename
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3017 if(not pretmpfilename
):
3019 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3020 tmpfilename
= f
.name
;
3022 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3023 except AttributeError:
3025 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3030 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
3031 f
.write(pretmpfilename
['Content']);
3033 exec_time_end
= time
.time();
3034 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3035 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3038 if(not havehttpcore
):
3039 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3040 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3044 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3045 global geturls_download_sleep
, havezstd
, havebrotli
;
3047 sleep
= geturls_download_sleep
;
3050 if(not outfile
=="-"):
3051 outpath
= outpath
.rstrip(os
.path
.sep
);
3052 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3053 if(not os
.path
.exists(outpath
)):
3054 os
.makedirs(outpath
);
3055 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3057 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3059 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3060 if(not pretmpfilename
):
3062 tmpfilename
= pretmpfilename
['Filename'];
3063 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3065 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3066 exec_time_start
= time
.time();
3067 shutil
.move(tmpfilename
, filepath
);
3069 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3070 except AttributeError:
3072 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3077 exec_time_end
= time
.time();
3078 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3079 if(os
.path
.exists(tmpfilename
)):
3080 os
.remove(tmpfilename
);
3081 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3083 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3084 tmpfilename
= pretmpfilename
['Filename'];
3085 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3088 exec_time_start
= time
.time();
3089 with
open(tmpfilename
, 'rb') as ft
:
3092 databytes
= ft
.read(buffersize
[1]);
3093 if not databytes
: break;
3094 datasize
= len(databytes
);
3095 fulldatasize
= datasize
+ fulldatasize
;
3098 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3099 downloaddiff
= fulldatasize
- prevdownsize
;
3100 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3101 prevdownsize
= fulldatasize
;
3104 fdata
= f
.getvalue();
3107 os
.remove(tmpfilename
);
3108 exec_time_end
= time
.time();
3109 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3110 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3114 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3115 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3119 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3120 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3123 if(not haveurllib3
):
3124 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3125 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3129 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3130 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3133 if(not haveurllib3
):
3134 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3135 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3139 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3140 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3143 if(not haveurllib3
):
3144 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3145 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3149 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3150 global geturls_download_sleep
, havezstd
, havebrotli
;
3152 sleep
= geturls_download_sleep
;
3155 urlparts
= urlparse
.urlparse(httpurl
);
3156 if(isinstance(httpheaders
, list)):
3157 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3158 httpheaders
= fix_header_names(httpheaders
);
3159 if(httpuseragent
is not None):
3160 if('User-Agent' in httpheaders
):
3161 httpheaders
['User-Agent'] = httpuseragent
;
3163 httpuseragent
.update({'User-Agent': httpuseragent
});
3164 if(httpreferer
is not None):
3165 if('Referer' in httpheaders
):
3166 httpheaders
['Referer'] = httpreferer
;
3168 httpuseragent
.update({'Referer': httpreferer
});
3169 if(urlparts
.username
is not None or urlparts
.password
is not None):
3170 if(sys
.version
[0]=="2"):
3171 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3172 if(sys
.version
[0]>="3"):
3173 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3174 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3176 timeout
= urllib3
.util
.Timeout(connect
=timeout
, read
=timeout
);
3177 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
, timeout
=timeout
);
3178 if(postdata
is not None and not isinstance(postdata
, dict)):
3179 postdata
= urlencode(postdata
);
3181 if(httpmethod
=="GET"):
3182 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3183 elif(httpmethod
=="POST"):
3184 geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
3186 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3187 except urllib3
.exceptions
.ConnectTimeoutError
:
3188 log
.info("Error With URL "+httpurl
);
3190 except urllib3
.exceptions
.ConnectError
:
3191 log
.info("Error With URL "+httpurl
);
3193 except urllib3
.exceptions
.MaxRetryError
:
3194 log
.info("Error With URL "+httpurl
);
3196 except socket
.timeout
:
3197 log
.info("Error With URL "+httpurl
);
3200 log
.info("Error With URL "+httpurl
);
3202 httpcodeout
= geturls_text
.status
;
3203 httpcodereason
= geturls_text
.reason
;
3204 if(geturls_text
.version
=="10"):
3205 httpversionout
= "1.0";
3207 httpversionout
= "1.1";
3208 httpmethodout
= httpmethod
;
3209 httpurlout
= geturls_text
.geturl();
3210 httpheaderout
= geturls_text
.info();
3211 httpheadersentout
= httpheaders
;
3212 if(isinstance(httpheaderout
, list)):
3213 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3214 if(sys
.version
[0]=="2"):
3216 prehttpheaderout
= httpheaderout
;
3217 httpheaderkeys
= httpheaderout
.keys();
3218 imax
= len(httpheaderkeys
);
3222 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3224 except AttributeError:
3226 httpheaderout
= fix_header_names(httpheaderout
);
3227 if(isinstance(httpheadersentout
, list)):
3228 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3229 httpheadersentout
= fix_header_names(httpheadersentout
);
3230 log
.info("Downloading URL "+httpurl
);
3231 downloadsize
= httpheaderout
.get('Content-Length');
3232 if(downloadsize
is not None):
3233 downloadsize
= int(downloadsize
);
3234 if downloadsize
is None: downloadsize
= 0;
3237 log
.info("Downloading URL "+httpurl
);
3238 with
BytesIO() as strbuf
:
3240 databytes
= geturls_text
.read(buffersize
);
3241 if not databytes
: break;
3242 datasize
= len(databytes
);
3243 fulldatasize
= datasize
+ fulldatasize
;
3246 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3247 downloaddiff
= fulldatasize
- prevdownsize
;
3248 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3249 prevdownsize
= fulldatasize
;
3250 strbuf
.write(databytes
);
3252 returnval_content
= strbuf
.read();
3253 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3255 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3258 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3260 returnval_content
= zlib
.decompress(returnval_content
);
3263 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3265 returnval_content
= brotli
.decompress(returnval_content
);
3266 except brotli
.error
:
3268 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3270 returnval_content
= zstandard
.decompress(returnval_content
);
3271 except zstandard
.error
:
3273 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
3274 geturls_text
.close();
3277 if(not haveurllib3
):
3278 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3279 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3283 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3284 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3285 exec_time_start
= time
.time();
3286 myhash
= hashlib
.new("sha1");
3287 if(sys
.version
[0]=="2"):
3288 myhash
.update(httpurl
);
3289 myhash
.update(str(buffersize
));
3290 myhash
.update(str(exec_time_start
));
3291 if(sys
.version
[0]>="3"):
3292 myhash
.update(httpurl
.encode('utf-8'));
3293 myhash
.update(str(buffersize
).encode('utf-8'));
3294 myhash
.update(str(exec_time_start
).encode('utf-8'));
3295 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3297 sleep
= geturls_download_sleep
;
3300 pretmpfilename
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3301 if(not pretmpfilename
):
3303 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3304 tmpfilename
= f
.name
;
3306 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3307 except AttributeError:
3309 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3314 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
3315 f
.write(pretmpfilename
['Content']);
3317 exec_time_end
= time
.time();
3318 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3319 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3322 if(not haveurllib3
):
3323 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3324 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3328 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3329 global geturls_download_sleep
, havezstd
, havebrotli
;
3331 sleep
= geturls_download_sleep
;
3334 if(not outfile
=="-"):
3335 outpath
= outpath
.rstrip(os
.path
.sep
);
3336 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3337 if(not os
.path
.exists(outpath
)):
3338 os
.makedirs(outpath
);
3339 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3341 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3343 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3344 if(not pretmpfilename
):
3346 tmpfilename
= pretmpfilename
['Filename'];
3347 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3349 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3350 exec_time_start
= time
.time();
3351 shutil
.move(tmpfilename
, filepath
);
3353 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3354 except AttributeError:
3356 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3361 exec_time_end
= time
.time();
3362 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3363 if(os
.path
.exists(tmpfilename
)):
3364 os
.remove(tmpfilename
);
3365 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3367 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3368 tmpfilename
= pretmpfilename
['Filename'];
3369 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3372 exec_time_start
= time
.time();
3373 with
open(tmpfilename
, 'rb') as ft
:
3376 databytes
= ft
.read(buffersize
[1]);
3377 if not databytes
: break;
3378 datasize
= len(databytes
);
3379 fulldatasize
= datasize
+ fulldatasize
;
3382 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3383 downloaddiff
= fulldatasize
- prevdownsize
;
3384 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3385 prevdownsize
= fulldatasize
;
3388 fdata
= f
.getvalue();
3391 os
.remove(tmpfilename
);
3392 exec_time_end
= time
.time();
3393 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3394 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3397 if(not haveurllib3
):
3398 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3399 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3403 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3404 global geturls_download_sleep
, havezstd
, havebrotli
;
3406 sleep
= geturls_download_sleep
;
3409 urlparts
= urlparse
.urlparse(httpurl
);
3410 if(isinstance(httpheaders
, list)):
3411 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3412 httpheaders
= fix_header_names(httpheaders
);
3413 if(httpuseragent
is not None):
3414 if('User-Agent' in httpheaders
):
3415 httpheaders
['User-Agent'] = httpuseragent
;
3417 httpuseragent
.update({'User-Agent': httpuseragent
});
3418 if(httpreferer
is not None):
3419 if('Referer' in httpheaders
):
3420 httpheaders
['Referer'] = httpreferer
;
3422 httpuseragent
.update({'Referer': httpreferer
});
3423 if(urlparts
.username
is not None or urlparts
.password
is not None):
3424 if(sys
.version
[0]=="2"):
3425 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3426 if(sys
.version
[0]>="3"):
3427 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3428 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3429 geturls_opener
= mechanize
.Browser();
3430 if(isinstance(httpheaders
, dict)):
3431 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
3433 geturls_opener
.addheaders
= httpheaders
;
3434 geturls_opener
.set_cookiejar(httpcookie
);
3435 geturls_opener
.set_handle_robots(False);
3436 if(postdata
is not None and not isinstance(postdata
, dict)):
3437 postdata
= urlencode(postdata
);
3439 if(httpmethod
=="GET"):
3440 geturls_text
= geturls_opener
.open(httpurl
);
3441 elif(httpmethod
=="POST"):
3442 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
3444 geturls_text
= geturls_opener
.open(httpurl
);
3445 except mechanize
.HTTPError
as geturls_text_error
:
3446 geturls_text
= geturls_text_error
;
3447 log
.info("Error With URL "+httpurl
);
3449 log
.info("Error With URL "+httpurl
);
3451 except socket
.timeout
:
3452 log
.info("Error With URL "+httpurl
);
3454 httpcodeout
= geturls_text
.code
;
3455 httpcodereason
= geturls_text
.msg
;
3456 httpversionout
= "1.1";
3457 httpmethodout
= httpmethod
;
3458 httpurlout
= geturls_text
.geturl();
3459 httpheaderout
= geturls_text
.info();
3460 reqhead
= geturls_opener
.request
;
3461 httpheadersentout
= reqhead
.header_items();
3462 if(isinstance(httpheaderout
, list)):
3463 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3464 if(sys
.version
[0]=="2"):
3466 prehttpheaderout
= httpheaderout
;
3467 httpheaderkeys
= httpheaderout
.keys();
3468 imax
= len(httpheaderkeys
);
3472 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3474 except AttributeError:
3476 httpheaderout
= fix_header_names(httpheaderout
);
3477 if(isinstance(httpheadersentout
, list)):
3478 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3479 httpheadersentout
= fix_header_names(httpheadersentout
);
3480 log
.info("Downloading URL "+httpurl
);
3481 downloadsize
= httpheaderout
.get('Content-Length');
3482 if(downloadsize
is not None):
3483 downloadsize
= int(downloadsize
);
3484 if downloadsize
is None: downloadsize
= 0;
3487 log
.info("Downloading URL "+httpurl
);
3488 with
BytesIO() as strbuf
:
3490 databytes
= geturls_text
.read(buffersize
);
3491 if not databytes
: break;
3492 datasize
= len(databytes
);
3493 fulldatasize
= datasize
+ fulldatasize
;
3496 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3497 downloaddiff
= fulldatasize
- prevdownsize
;
3498 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3499 prevdownsize
= fulldatasize
;
3500 strbuf
.write(databytes
);
3502 returnval_content
= strbuf
.read();
3503 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3505 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3508 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3510 returnval_content
= zlib
.decompress(returnval_content
);
3513 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3515 returnval_content
= brotli
.decompress(returnval_content
);
3516 except brotli
.error
:
3518 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3520 returnval_content
= zstandard
.decompress(returnval_content
);
3521 except zstandard
.error
:
3523 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
3524 geturls_text
.close();
3527 if(not havemechanize
):
3528 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3529 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3533 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3534 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3535 exec_time_start
= time
.time();
3536 myhash
= hashlib
.new("sha1");
3537 if(sys
.version
[0]=="2"):
3538 myhash
.update(httpurl
);
3539 myhash
.update(str(buffersize
));
3540 myhash
.update(str(exec_time_start
));
3541 if(sys
.version
[0]>="3"):
3542 myhash
.update(httpurl
.encode('utf-8'));
3543 myhash
.update(str(buffersize
).encode('utf-8'));
3544 myhash
.update(str(exec_time_start
).encode('utf-8'));
3545 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3547 sleep
= geturls_download_sleep
;
3550 pretmpfilename
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3551 if(not pretmpfilename
):
3553 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3554 tmpfilename
= f
.name
;
3556 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3557 except AttributeError:
3559 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3564 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
3565 f
.write(pretmpfilename
['Content']);
3567 exec_time_end
= time
.time();
3568 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3569 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3572 if(not havemechanize
):
3573 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3574 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3578 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3579 global geturls_download_sleep
, havezstd
, havebrotli
;
3581 sleep
= geturls_download_sleep
;
3584 if(not outfile
=="-"):
3585 outpath
= outpath
.rstrip(os
.path
.sep
);
3586 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3587 if(not os
.path
.exists(outpath
)):
3588 os
.makedirs(outpath
);
3589 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3591 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3593 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3594 if(not pretmpfilename
):
3596 tmpfilename
= pretmpfilename
['Filename'];
3597 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3599 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3600 exec_time_start
= time
.time();
3601 shutil
.move(tmpfilename
, filepath
);
3603 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3604 except AttributeError:
3606 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3611 exec_time_end
= time
.time();
3612 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3613 if(os
.path
.exists(tmpfilename
)):
3614 os
.remove(tmpfilename
);
3615 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3617 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3618 tmpfilename
= pretmpfilename
['Filename'];
3619 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3622 exec_time_start
= time
.time();
3623 with
open(tmpfilename
, 'rb') as ft
:
3626 databytes
= ft
.read(buffersize
[1]);
3627 if not databytes
: break;
3628 datasize
= len(databytes
);
3629 fulldatasize
= datasize
+ fulldatasize
;
3632 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3633 downloaddiff
= fulldatasize
- prevdownsize
;
3634 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3635 prevdownsize
= fulldatasize
;
3638 fdata
= f
.getvalue();
3641 os
.remove(tmpfilename
);
3642 exec_time_end
= time
.time();
3643 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3644 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3647 if(not havemechanize
):
3648 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3649 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3653 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3654 global geturls_download_sleep
, havezstd
, havebrotli
;
3656 sleep
= geturls_download_sleep
;
3659 urlparts
= urlparse
.urlparse(httpurl
);
3660 if(isinstance(httpheaders
, list)):
3661 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3662 httpheaders
= fix_header_names(httpheaders
);
3663 if(httpuseragent
is not None):
3664 if('User-Agent' in httpheaders
):
3665 httpheaders
['User-Agent'] = httpuseragent
;
3667 httpuseragent
.update({'User-Agent': httpuseragent
});
3668 if(httpreferer
is not None):
3669 if('Referer' in httpheaders
):
3670 httpheaders
['Referer'] = httpreferer
;
3672 httpuseragent
.update({'Referer': httpreferer
});
3673 if(urlparts
.username
is not None or urlparts
.password
is not None):
3674 if(sys
.version
[0]=="2"):
3675 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3676 if(sys
.version
[0]>="3"):
3677 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3678 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3679 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
3680 if(isinstance(httpheaders
, dict)):
3681 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
3682 geturls_opener
.addheaders
= httpheaders
;
3684 if(postdata
is not None and not isinstance(postdata
, dict)):
3685 postdata
= urlencode(postdata
);
3686 retrieved_body
= BytesIO();
3687 retrieved_headers
= BytesIO();
3689 if(httpmethod
=="GET"):
3690 geturls_text
= pycurl
.Curl();
3691 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3692 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3693 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3694 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3695 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3696 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3697 geturls_text
.perform();
3698 elif(httpmethod
=="POST"):
3699 geturls_text
= pycurl
.Curl();
3700 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3701 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3702 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3703 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3704 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3705 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3706 geturls_text
.setopt(geturls_text
.POST
, True);
3707 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
3708 geturls_text
.perform();
3710 geturls_text
= pycurl
.Curl();
3711 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3712 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3713 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3714 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3715 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3716 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3717 geturls_text
.perform();
3718 retrieved_headers
.seek(0);
3719 if(sys
.version
[0]=="2"):
3720 pycurlhead
= retrieved_headers
.read();
3721 if(sys
.version
[0]>="3"):
3722 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
3723 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip().rstrip('\r\n'))[0];
3724 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
3725 retrieved_body
.seek(0);
3726 except socket
.timeout
:
3727 log
.info("Error With URL "+httpurl
);
3729 except socket
.gaierror
:
3730 log
.info("Error With URL "+httpurl
);
3733 log
.info("Error With URL "+httpurl
);
3735 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
3736 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
3737 httpversionout
= pyhttpverinfo
[0];
3738 httpmethodout
= httpmethod
;
3739 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
3740 httpheaderout
= pycurlheadersout
;
3741 httpheadersentout
= httpheaders
;
3742 if(isinstance(httpheaderout
, list)):
3743 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
3744 if(sys
.version
[0]=="2"):
3746 prehttpheaderout
= httpheaderout
;
3747 httpheaderkeys
= httpheaderout
.keys();
3748 imax
= len(httpheaderkeys
);
3752 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3754 except AttributeError:
3756 httpheaderout
= fix_header_names(httpheaderout
);
3757 if(isinstance(httpheadersentout
, list)):
3758 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
3759 httpheadersentout
= fix_header_names(httpheadersentout
);
3760 log
.info("Downloading URL "+httpurl
);
3761 downloadsize
= httpheaderout
.get('Content-Length');
3762 if(downloadsize
is not None):
3763 downloadsize
= int(downloadsize
);
3764 if downloadsize
is None: downloadsize
= 0;
3767 log
.info("Downloading URL "+httpurl
);
3768 with
BytesIO() as strbuf
:
3770 databytes
= retrieved_body
.read(buffersize
);
3771 if not databytes
: break;
3772 datasize
= len(databytes
);
3773 fulldatasize
= datasize
+ fulldatasize
;
3776 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3777 downloaddiff
= fulldatasize
- prevdownsize
;
3778 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3779 prevdownsize
= fulldatasize
;
3780 strbuf
.write(databytes
);
3782 returnval_content
= strbuf
.read();
3783 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3785 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3788 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
3790 returnval_content
= zlib
.decompress(returnval_content
);
3793 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3795 returnval_content
= brotli
.decompress(returnval_content
);
3796 except brotli
.error
:
3798 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
3800 returnval_content
= zstandard
.decompress(returnval_content
);
3801 except zstandard
.error
:
3803 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
3804 geturls_text
.close();
3808 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3809 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3813 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3814 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
3815 exec_time_start
= time
.time();
3816 myhash
= hashlib
.new("sha1");
3817 if(sys
.version
[0]=="2"):
3818 myhash
.update(httpurl
);
3819 myhash
.update(str(buffersize
));
3820 myhash
.update(str(exec_time_start
));
3821 if(sys
.version
[0]>="3"):
3822 myhash
.update(httpurl
.encode('utf-8'));
3823 myhash
.update(str(buffersize
).encode('utf-8'));
3824 myhash
.update(str(exec_time_start
).encode('utf-8'));
3825 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3827 sleep
= geturls_download_sleep
;
3830 pretmpfilename
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3831 if(not pretmpfilename
):
3833 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3834 tmpfilename
= f
.name
;
3836 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3837 except AttributeError:
3839 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3844 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
3845 f
.write(pretmpfilename
['Content']);
3847 exec_time_end
= time
.time();
3848 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3849 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3853 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3854 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3858 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3859 global geturls_download_sleep
, havezstd
, havebrotli
;
3861 sleep
= geturls_download_sleep
;
3864 if(not outfile
=="-"):
3865 outpath
= outpath
.rstrip(os
.path
.sep
);
3866 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3867 if(not os
.path
.exists(outpath
)):
3868 os
.makedirs(outpath
);
3869 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3871 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3873 pretmpfilename
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3874 if(not pretmpfilename
):
3876 tmpfilename
= pretmpfilename
['Filename'];
3877 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3879 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3880 exec_time_start
= time
.time();
3881 shutil
.move(tmpfilename
, filepath
);
3883 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3884 except AttributeError:
3886 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3891 exec_time_end
= time
.time();
3892 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3893 if(os
.path
.exists(tmpfilename
)):
3894 os
.remove(tmpfilename
);
3895 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3897 pretmpfilename
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3898 tmpfilename
= pretmpfilename
['Filename'];
3899 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3902 exec_time_start
= time
.time();
3903 with
open(tmpfilename
, 'rb') as ft
:
3906 databytes
= ft
.read(buffersize
[1]);
3907 if not databytes
: break;
3908 datasize
= len(databytes
);
3909 fulldatasize
= datasize
+ fulldatasize
;
3912 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3913 downloaddiff
= fulldatasize
- prevdownsize
;
3914 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3915 prevdownsize
= fulldatasize
;
3918 fdata
= f
.getvalue();
3921 os
.remove(tmpfilename
);
3922 exec_time_end
= time
.time();
3923 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3924 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3928 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3929 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3932 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
3933 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3934 global geturls_download_sleep
, havezstd
, havebrotli
;
3936 sleep
= geturls_download_sleep
;
3939 urlparts
= urlparse
.urlparse(httpurl
);
3940 if(isinstance(httpheaders
, list)):
3941 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3942 httpheaders
= fix_header_names(httpheaders
);
3943 if(httpuseragent
is not None):
3944 if('User-Agent' in httpheaders
):
3945 httpheaders
['User-Agent'] = httpuseragent
;
3947 httpuseragent
.update({'User-Agent': httpuseragent
});
3948 if(httpreferer
is not None):
3949 if('Referer' in httpheaders
):
3950 httpheaders
['Referer'] = httpreferer
;
3952 httpuseragent
.update({'Referer': httpreferer
});
3953 if(urlparts
.username
is not None or urlparts
.password
is not None):
3954 if(sys
.version
[0]=="2"):
3955 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3956 if(sys
.version
[0]>="3"):
3957 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3958 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3959 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
3960 if(isinstance(httpheaders
, dict)):
3961 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
3962 geturls_opener
.addheaders
= httpheaders
;
3964 if(postdata
is not None and not isinstance(postdata
, dict)):
3965 postdata
= urlencode(postdata
);
3966 retrieved_body
= BytesIO();
3967 retrieved_headers
= BytesIO();
3969 if(httpmethod
=="GET"):
3970 geturls_text
= pycurl
.Curl();
3971 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3972 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
3973 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3974 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3975 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3976 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3977 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3978 geturls_text
.perform();
3979 elif(httpmethod
=="POST"):
3980 geturls_text
= pycurl
.Curl();
3981 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3982 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
3983 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3984 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3985 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3986 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3987 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3988 geturls_text
.setopt(geturls_text
.POST
, True);
3989 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
3990 geturls_text
.perform();
3992 geturls_text
= pycurl
.Curl();
3993 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3994 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
3995 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3996 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3997 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3998 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3999 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4000 geturls_text
.perform();
4001 retrieved_headers
.seek(0);
4002 if(sys
.version
[0]=="2"):
4003 pycurlhead
= retrieved_headers
.read();
4004 if(sys
.version
[0]>="3"):
4005 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
4006 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip())[0];
4007 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
4008 retrieved_body
.seek(0);
4009 except socket
.timeout
:
4010 log
.info("Error With URL "+httpurl
);
4012 except socket
.gaierror
:
4013 log
.info("Error With URL "+httpurl
);
4016 log
.info("Error With URL "+httpurl
);
4018 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
4019 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
4020 httpversionout
= pyhttpverinfo
[0];
4021 httpmethodout
= httpmethod
;
4022 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
4023 httpheaderout
= pycurlheadersout
;
4024 httpheadersentout
= httpheaders
;
4025 if(isinstance(httpheaderout
, list)):
4026 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
4027 if(sys
.version
[0]=="2"):
4029 prehttpheaderout
= httpheaderout
;
4030 httpheaderkeys
= httpheaderout
.keys();
4031 imax
= len(httpheaderkeys
);
4035 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4037 except AttributeError:
4039 httpheaderout
= fix_header_names(httpheaderout
);
4040 if(isinstance(httpheadersentout
, list)):
4041 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
4042 httpheadersentout
= fix_header_names(httpheadersentout
);
4043 log
.info("Downloading URL "+httpurl
);
4044 downloadsize
= httpheaderout
.get('Content-Length');
4045 if(downloadsize
is not None):
4046 downloadsize
= int(downloadsize
);
4047 if downloadsize
is None: downloadsize
= 0;
4050 log
.info("Downloading URL "+httpurl
);
4051 with
BytesIO() as strbuf
:
4053 databytes
= retrieved_body
.read(buffersize
);
4054 if not databytes
: break;
4055 datasize
= len(databytes
);
4056 fulldatasize
= datasize
+ fulldatasize
;
4059 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4060 downloaddiff
= fulldatasize
- prevdownsize
;
4061 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4062 prevdownsize
= fulldatasize
;
4063 strbuf
.write(databytes
);
4065 returnval_content
= strbuf
.read();
4066 if(httpheaderout
.get("Content-Encoding")=="gzip"):
4068 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
4071 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
4073 returnval_content
= zlib
.decompress(returnval_content
);
4076 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4078 returnval_content
= brotli
.decompress(returnval_content
);
4079 except brotli
.error
:
4081 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
4083 returnval_content
= zstandard
.decompress(returnval_content
);
4084 except zstandard
.error
:
4086 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
4087 geturls_text
.close();
4091 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4092 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4095 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4096 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4097 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4100 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4101 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4102 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4103 exec_time_start
= time
.time();
4104 myhash
= hashlib
.new("sha1");
4105 if(sys
.version
[0]=="2"):
4106 myhash
.update(httpurl
);
4107 myhash
.update(str(buffersize
));
4108 myhash
.update(str(exec_time_start
));
4109 if(sys
.version
[0]>="3"):
4110 myhash
.update(httpurl
.encode('utf-8'));
4111 myhash
.update(str(buffersize
).encode('utf-8'));
4112 myhash
.update(str(exec_time_start
).encode('utf-8'));
4113 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4115 sleep
= geturls_download_sleep
;
4118 pretmpfilename
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4119 if(not pretmpfilename
):
4121 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4122 tmpfilename
= f
.name
;
4124 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4125 except AttributeError:
4127 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4132 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
4133 f
.write(pretmpfilename
['Content']);
4135 exec_time_end
= time
.time();
4136 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4137 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4141 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4142 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4145 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4146 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4147 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4150 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4151 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4152 global geturls_download_sleep
, havezstd
, havebrotli
;
4154 sleep
= geturls_download_sleep
;
4157 if(not outfile
=="-"):
4158 outpath
= outpath
.rstrip(os
.path
.sep
);
4159 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4160 if(not os
.path
.exists(outpath
)):
4161 os
.makedirs(outpath
);
4162 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4164 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4166 pretmpfilename
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4167 if(not pretmpfilename
):
4169 tmpfilename
= pretmpfilename
['Filename'];
4170 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4172 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4173 exec_time_start
= time
.time();
4174 shutil
.move(tmpfilename
, filepath
);
4176 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4177 except AttributeError:
4179 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4184 exec_time_end
= time
.time();
4185 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4186 if(os
.path
.exists(tmpfilename
)):
4187 os
.remove(tmpfilename
);
4188 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4190 pretmpfilename
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4191 tmpfilename
= pretmpfilename
['Filename'];
4192 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4195 exec_time_start
= time
.time();
4196 with
open(tmpfilename
, 'rb') as ft
:
4199 databytes
= ft
.read(buffersize
[1]);
4200 if not databytes
: break;
4201 datasize
= len(databytes
);
4202 fulldatasize
= datasize
+ fulldatasize
;
4205 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4206 downloaddiff
= fulldatasize
- prevdownsize
;
4207 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4208 prevdownsize
= fulldatasize
;
4211 fdata
= f
.getvalue();
4214 os
.remove(tmpfilename
);
4215 exec_time_end
= time
.time();
4216 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4217 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4221 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4222 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4225 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4226 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4227 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4230 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4231 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4232 global geturls_download_sleep
, havezstd
, havebrotli
;
4234 sleep
= geturls_download_sleep
;
4237 urlparts
= urlparse
.urlparse(httpurl
);
4238 if(isinstance(httpheaders
, list)):
4239 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4240 httpheaders
= fix_header_names(httpheaders
);
4241 if(httpuseragent
is not None):
4242 if('User-Agent' in httpheaders
):
4243 httpheaders
['User-Agent'] = httpuseragent
;
4245 httpuseragent
.update({'User-Agent': httpuseragent
});
4246 if(httpreferer
is not None):
4247 if('Referer' in httpheaders
):
4248 httpheaders
['Referer'] = httpreferer
;
4250 httpuseragent
.update({'Referer': httpreferer
});
4251 if(urlparts
.username
is not None or urlparts
.password
is not None):
4252 if(sys
.version
[0]=="2"):
4253 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
4254 if(sys
.version
[0]>="3"):
4255 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4256 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4257 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
4258 if(isinstance(httpheaders
, dict)):
4259 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
4260 geturls_opener
.addheaders
= httpheaders
;
4262 if(postdata
is not None and not isinstance(postdata
, dict)):
4263 postdata
= urlencode(postdata
);
4264 retrieved_body
= BytesIO();
4265 retrieved_headers
= BytesIO();
4267 if(httpmethod
=="GET"):
4268 geturls_text
= pycurl
.Curl();
4269 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4270 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4271 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4272 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4273 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4274 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4275 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4276 geturls_text
.perform();
4277 elif(httpmethod
=="POST"):
4278 geturls_text
= pycurl
.Curl();
4279 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4280 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4281 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4282 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4283 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4284 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4285 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4286 geturls_text
.setopt(geturls_text
.POST
, True);
4287 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
4288 geturls_text
.perform();
4290 geturls_text
= pycurl
.Curl();
4291 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
4292 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
4293 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
4294 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
4295 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
4296 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
4297 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
4298 geturls_text
.perform();
4299 retrieved_headers
.seek(0);
4300 if(sys
.version
[0]=="2"):
4301 pycurlhead
= retrieved_headers
.read();
4302 if(sys
.version
[0]>="3"):
4303 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
4304 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+)(?: ([A-Za-z\s]+))?$', pycurlhead
.splitlines()[0].strip().rstrip('\r\n'))[0];
4305 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
4306 retrieved_body
.seek(0);
4307 except socket
.timeout
:
4308 log
.info("Error With URL "+httpurl
);
4310 except socket
.gaierror
:
4311 log
.info("Error With URL "+httpurl
);
4314 log
.info("Error With URL "+httpurl
);
4316 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
4317 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
4318 httpversionout
= pyhttpverinfo
[0];
4319 httpmethodout
= httpmethod
;
4320 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
4321 httpheaderout
= pycurlheadersout
;
4322 httpheadersentout
= httpheaders
;
4323 if(isinstance(httpheaderout
, list)):
4324 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
4325 if(sys
.version
[0]=="2"):
4327 prehttpheaderout
= httpheaderout
;
4328 httpheaderkeys
= httpheaderout
.keys();
4329 imax
= len(httpheaderkeys
);
4333 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4335 except AttributeError:
4337 httpheaderout
= fix_header_names(httpheaderout
);
4338 if(isinstance(httpheadersentout
, list)):
4339 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
4340 httpheadersentout
= fix_header_names(httpheadersentout
);
4341 log
.info("Downloading URL "+httpurl
);
4342 downloadsize
= httpheaderout
.get('Content-Length');
4343 if(downloadsize
is not None):
4344 downloadsize
= int(downloadsize
);
4345 if downloadsize
is None: downloadsize
= 0;
4348 log
.info("Downloading URL "+httpurl
);
4349 with
BytesIO() as strbuf
:
4351 databytes
= retrieved_body
.read(buffersize
);
4352 if not databytes
: break;
4353 datasize
= len(databytes
);
4354 fulldatasize
= datasize
+ fulldatasize
;
4357 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4358 downloaddiff
= fulldatasize
- prevdownsize
;
4359 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4360 prevdownsize
= fulldatasize
;
4361 strbuf
.write(databytes
);
4363 returnval_content
= strbuf
.read();
4364 if(httpheaderout
.get("Content-Encoding")=="gzip"):
4366 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
4369 elif(httpheaderout
.get("Content-Encoding")=="deflate"):
4371 returnval_content
= zlib
.decompress(returnval_content
);
4374 elif(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4376 returnval_content
= brotli
.decompress(returnval_content
);
4377 except brotli
.error
:
4379 elif(httpheaderout
.get("Content-Encoding")=="zstd" and havezstd
):
4381 returnval_content
= zstandard
.decompress(returnval_content
);
4382 except zstandard
.error
:
4384 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
4385 geturls_text
.close();
4389 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4390 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4393 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4394 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4395 returnval
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4398 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4399 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4400 returnval
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4403 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4404 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4405 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4406 exec_time_start
= time
.time();
4407 myhash
= hashlib
.new("sha1");
4408 if(sys
.version
[0]=="2"):
4409 myhash
.update(httpurl
);
4410 myhash
.update(str(buffersize
));
4411 myhash
.update(str(exec_time_start
));
4412 if(sys
.version
[0]>="3"):
4413 myhash
.update(httpurl
.encode('utf-8'));
4414 myhash
.update(str(buffersize
).encode('utf-8'));
4415 myhash
.update(str(exec_time_start
).encode('utf-8'));
4416 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4418 sleep
= geturls_download_sleep
;
4421 pretmpfilename
= download_from_url_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4422 if(not pretmpfilename
):
4424 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4425 tmpfilename
= f
.name
;
4427 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4428 except AttributeError:
4430 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4435 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
4436 f
.write(pretmpfilename
['Content']);
4438 exec_time_end
= time
.time();
4439 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4440 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4444 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4445 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4448 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4449 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4450 returnval
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4453 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4454 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4455 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4458 if(havepycurl
and hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4459 def download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4460 global geturls_download_sleep
, havezstd
, havebrotli
;
4462 sleep
= geturls_download_sleep
;
4465 if(not outfile
=="-"):
4466 outpath
= outpath
.rstrip(os
.path
.sep
);
4467 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4468 if(not os
.path
.exists(outpath
)):
4469 os
.makedirs(outpath
);
4470 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4472 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4474 pretmpfilename
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4475 if(not pretmpfilename
):
4477 tmpfilename
= pretmpfilename
['Filename'];
4478 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4480 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4481 exec_time_start
= time
.time();
4482 shutil
.move(tmpfilename
, filepath
);
4484 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4485 except AttributeError:
4487 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4492 exec_time_end
= time
.time();
4493 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4494 if(os
.path
.exists(tmpfilename
)):
4495 os
.remove(tmpfilename
);
4496 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4498 pretmpfilename
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4499 tmpfilename
= pretmpfilename
['Filename'];
4500 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4503 exec_time_start
= time
.time();
4504 with
open(tmpfilename
, 'rb') as ft
:
4507 databytes
= ft
.read(buffersize
[1]);
4508 if not databytes
: break;
4509 datasize
= len(databytes
);
4510 fulldatasize
= datasize
+ fulldatasize
;
4513 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4514 downloaddiff
= fulldatasize
- prevdownsize
;
4515 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4516 prevdownsize
= fulldatasize
;
4519 fdata
= f
.getvalue();
4522 os
.remove(tmpfilename
);
4523 exec_time_end
= time
.time();
4524 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4525 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4529 def download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4530 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4533 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0") and hasattr(pycurl
, "CURL_HTTP_VERSION_2_0")):
4534 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4535 returnval
= download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4538 if(havepycurl
and not hasattr(pycurl
, "CURL_HTTP_VERSION_2_0") and not hasattr(pycurl
, "CURL_HTTP_VERSION_3_0")):
4539 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4540 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4543 def download_file_from_ftp_file(url
):
4544 urlparts
= urlparse
.urlparse(url
);
4545 file_name
= os
.path
.basename(urlparts
.path
);
4546 file_dir
= os
.path
.dirname(urlparts
.path
);
4547 if(urlparts
.username
is not None):
4548 ftp_username
= urlparts
.username
;
4550 ftp_username
= "anonymous";
4551 if(urlparts
.password
is not None):
4552 ftp_password
= urlparts
.password
;
4553 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4554 ftp_password
= "anonymous";
4557 if(urlparts
.scheme
=="ftp"):
4559 elif(urlparts
.scheme
=="ftps"):
4563 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4565 ftp_port
= urlparts
.port
;
4566 if(urlparts
.port
is None):
4569 ftp
.connect(urlparts
.hostname
, ftp_port
);
4570 except socket
.gaierror
:
4571 log
.info("Error With URL "+httpurl
);
4573 except socket
.timeout
:
4574 log
.info("Error With URL "+httpurl
);
4576 ftp
.login(urlparts
.username
, urlparts
.password
);
4577 if(urlparts
.scheme
=="ftps"):
4579 ftpfile
= BytesIO();
4580 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
4581 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
4586 def download_file_from_ftp_string(url
):
4587 ftpfile
= download_file_from_ftp_file(url
);
4588 return ftpfile
.read();
4590 def download_from_url_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4591 global geturls_download_sleep
, havezstd
, havebrotli
;
4593 sleep
= geturls_download_sleep
;
4596 urlparts
= urlparse
.urlparse(httpurl
);
4597 if(isinstance(httpheaders
, list)):
4598 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4599 httpheaders
= fix_header_names(httpheaders
);
4600 if(httpuseragent
is not None):
4601 if('User-Agent' in httpheaders
):
4602 httpheaders
['User-Agent'] = httpuseragent
;
4604 httpuseragent
.update({'User-Agent': httpuseragent
});
4605 if(httpreferer
is not None):
4606 if('Referer' in httpheaders
):
4607 httpheaders
['Referer'] = httpreferer
;
4609 httpuseragent
.update({'Referer': httpreferer
});
4610 if(isinstance(httpheaders
, dict)):
4611 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4613 geturls_text
= download_file_from_ftp_file(httpurl
);
4614 if(not geturls_text
):
4616 downloadsize
= None;
4617 if(downloadsize
is not None):
4618 downloadsize
= int(downloadsize
);
4619 if downloadsize
is None: downloadsize
= 0;
4622 log
.info("Downloading URL "+httpurl
);
4623 with
BytesIO() as strbuf
:
4625 databytes
= geturls_text
.read(buffersize
);
4626 if not databytes
: break;
4627 datasize
= len(databytes
);
4628 fulldatasize
= datasize
+ fulldatasize
;
4631 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4632 downloaddiff
= fulldatasize
- prevdownsize
;
4633 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4634 prevdownsize
= fulldatasize
;
4635 strbuf
.write(databytes
);
4637 returnval_content
= strbuf
.read();
4638 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4639 geturls_text
.close();
4642 def download_from_url_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4643 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4644 exec_time_start
= time
.time();
4645 myhash
= hashlib
.new("sha1");
4646 if(sys
.version
[0]=="2"):
4647 myhash
.update(httpurl
);
4648 myhash
.update(str(buffersize
));
4649 myhash
.update(str(exec_time_start
));
4650 if(sys
.version
[0]>="3"):
4651 myhash
.update(httpurl
.encode('utf-8'));
4652 myhash
.update(str(buffersize
).encode('utf-8'));
4653 myhash
.update(str(exec_time_start
).encode('utf-8'));
4654 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4656 sleep
= geturls_download_sleep
;
4659 pretmpfilename
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4660 if(not pretmpfilename
):
4662 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4663 tmpfilename
= f
.name
;
4665 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4666 except AttributeError:
4668 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4673 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
4674 f
.write(pretmpfilename
['Content']);
4676 exec_time_end
= time
.time();
4677 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4678 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4681 def download_from_url_to_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4682 global geturls_download_sleep
, havezstd
, havebrotli
;
4684 sleep
= geturls_download_sleep
;
4687 if(not outfile
=="-"):
4688 outpath
= outpath
.rstrip(os
.path
.sep
);
4689 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4690 if(not os
.path
.exists(outpath
)):
4691 os
.makedirs(outpath
);
4692 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4694 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4696 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4697 if(not pretmpfilename
):
4699 tmpfilename
= pretmpfilename
['Filename'];
4700 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4702 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4703 exec_time_start
= time
.time();
4704 shutil
.move(tmpfilename
, filepath
);
4705 exec_time_end
= time
.time();
4706 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4707 if(os
.path
.exists(tmpfilename
)):
4708 os
.remove(tmpfilename
);
4709 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4711 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4712 tmpfilename
= pretmpfilename
['Filename'];
4713 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4716 exec_time_start
= time
.time();
4717 with
open(tmpfilename
, 'rb') as ft
:
4720 databytes
= ft
.read(buffersize
[1]);
4721 if not databytes
: break;
4722 datasize
= len(databytes
);
4723 fulldatasize
= datasize
+ fulldatasize
;
4726 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4727 downloaddiff
= fulldatasize
- prevdownsize
;
4728 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4729 prevdownsize
= fulldatasize
;
4732 fdata
= f
.getvalue();
4735 os
.remove(tmpfilename
);
4736 exec_time_end
= time
.time();
4737 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4738 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4741 def upload_file_to_ftp_file(ftpfile
, url
):
4742 urlparts
= urlparse
.urlparse(url
);
4743 file_name
= os
.path
.basename(urlparts
.path
);
4744 file_dir
= os
.path
.dirname(urlparts
.path
);
4745 if(urlparts
.username
is not None):
4746 ftp_username
= urlparts
.username
;
4748 ftp_username
= "anonymous";
4749 if(urlparts
.password
is not None):
4750 ftp_password
= urlparts
.password
;
4751 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4752 ftp_password
= "anonymous";
4755 if(urlparts
.scheme
=="ftp"):
4757 elif(urlparts
.scheme
=="ftps"):
4761 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4763 ftp_port
= urlparts
.port
;
4764 if(urlparts
.port
is None):
4767 ftp
.connect(urlparts
.hostname
, ftp_port
);
4768 except socket
.gaierror
:
4769 log
.info("Error With URL "+httpurl
);
4771 except socket
.timeout
:
4772 log
.info("Error With URL "+httpurl
);
4774 ftp
.login(urlparts
.username
, urlparts
.password
);
4775 if(urlparts
.scheme
=="ftps"):
4777 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
4782 def upload_file_to_ftp_string(ftpstring
, url
):
4783 ftpfileo
= BytesIO(ftpstring
);
4784 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
4789 def download_file_from_sftp_file(url
):
4790 urlparts
= urlparse
.urlparse(url
);
4791 file_name
= os
.path
.basename(urlparts
.path
);
4792 file_dir
= os
.path
.dirname(urlparts
.path
);
4793 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4795 sftp_port
= urlparts
.port
;
4796 if(urlparts
.port
is None):
4799 sftp_port
= urlparts
.port
;
4800 if(urlparts
.username
is not None):
4801 sftp_username
= urlparts
.username
;
4803 sftp_username
= "anonymous";
4804 if(urlparts
.password
is not None):
4805 sftp_password
= urlparts
.password
;
4806 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4807 sftp_password
= "anonymous";
4810 if(urlparts
.scheme
!="sftp"):
4812 ssh
= paramiko
.SSHClient();
4813 ssh
.load_system_host_keys();
4814 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
4816 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
4817 except paramiko
.ssh_exception
.SSHException
:
4819 except socket
.gaierror
:
4820 log
.info("Error With URL "+httpurl
);
4822 except socket
.timeout
:
4823 log
.info("Error With URL "+httpurl
);
4825 sftp
= ssh
.open_sftp();
4826 sftpfile
= BytesIO();
4827 sftp
.getfo(urlparts
.path
, sftpfile
);
4830 sftpfile
.seek(0, 0);
4833 def download_file_from_sftp_file(url
):
4837 def download_file_from_sftp_string(url
):
4838 sftpfile
= download_file_from_sftp_file(url
);
4839 return sftpfile
.read();
4841 def download_file_from_ftp_string(url
):
4845 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4846 global geturls_download_sleep
, havezstd
, havebrotli
;
4848 sleep
= geturls_download_sleep
;
4851 urlparts
= urlparse
.urlparse(httpurl
);
4852 if(isinstance(httpheaders
, list)):
4853 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4854 httpheaders
= fix_header_names(httpheaders
);
4855 if(httpuseragent
is not None):
4856 if('User-Agent' in httpheaders
):
4857 httpheaders
['User-Agent'] = httpuseragent
;
4859 httpuseragent
.update({'User-Agent': httpuseragent
});
4860 if(httpreferer
is not None):
4861 if('Referer' in httpheaders
):
4862 httpheaders
['Referer'] = httpreferer
;
4864 httpuseragent
.update({'Referer': httpreferer
});
4865 if(isinstance(httpheaders
, dict)):
4866 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4868 geturls_text
= download_file_from_sftp_file(httpurl
);
4869 if(not geturls_text
):
4871 downloadsize
= None;
4872 if(downloadsize
is not None):
4873 downloadsize
= int(downloadsize
);
4874 if downloadsize
is None: downloadsize
= 0;
4877 log
.info("Downloading URL "+httpurl
);
4878 with
BytesIO() as strbuf
:
4880 databytes
= geturls_text
.read(buffersize
);
4881 if not databytes
: break;
4882 datasize
= len(databytes
);
4883 fulldatasize
= datasize
+ fulldatasize
;
4886 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4887 downloaddiff
= fulldatasize
- prevdownsize
;
4888 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4889 prevdownsize
= fulldatasize
;
4890 strbuf
.write(databytes
);
4892 returnval_content
= strbuf
.read();
4893 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4894 geturls_text
.close();
4897 if(not haveparamiko
):
4898 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4902 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4903 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
4904 exec_time_start
= time
.time();
4905 myhash
= hashlib
.new("sha1");
4906 if(sys
.version
[0]=="2"):
4907 myhash
.update(httpurl
);
4908 myhash
.update(str(buffersize
));
4909 myhash
.update(str(exec_time_start
));
4910 if(sys
.version
[0]>="3"):
4911 myhash
.update(httpurl
.encode('utf-8'));
4912 myhash
.update(str(buffersize
).encode('utf-8'));
4913 myhash
.update(str(exec_time_start
).encode('utf-8'));
4914 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4916 sleep
= geturls_download_sleep
;
4919 pretmpfilename
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4920 if(not pretmpfilename
):
4922 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4923 tmpfilename
= f
.name
;
4925 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4926 except AttributeError:
4928 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4933 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
4934 f
.write(pretmpfilename
['Content']);
4936 exec_time_end
= time
.time();
4937 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4938 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4941 if(not haveparamiko
):
4942 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4946 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4947 global geturls_download_sleep
, havezstd
, havebrotli
;
4949 sleep
= geturls_download_sleep
;
4952 if(not outfile
=="-"):
4953 outpath
= outpath
.rstrip(os
.path
.sep
);
4954 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4955 if(not os
.path
.exists(outpath
)):
4956 os
.makedirs(outpath
);
4957 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4959 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4961 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4962 if(not pretmpfilename
):
4964 tmpfilename
= pretmpfilename
['Filename'];
4965 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4967 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4968 exec_time_start
= time
.time();
4969 shutil
.move(tmpfilename
, filepath
);
4970 exec_time_end
= time
.time();
4971 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4972 if(os
.path
.exists(tmpfilename
)):
4973 os
.remove(tmpfilename
);
4974 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4976 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4977 tmpfilename
= pretmpfilename
['Filename'];
4978 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4981 exec_time_start
= time
.time();
4982 with
open(tmpfilename
, 'rb') as ft
:
4985 databytes
= ft
.read(buffersize
[1]);
4986 if not databytes
: break;
4987 datasize
= len(databytes
);
4988 fulldatasize
= datasize
+ fulldatasize
;
4991 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4992 downloaddiff
= fulldatasize
- prevdownsize
;
4993 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4994 prevdownsize
= fulldatasize
;
4997 fdata
= f
.getvalue();
5000 os
.remove(tmpfilename
);
5001 exec_time_end
= time
.time();
5002 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5003 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
5006 if(not haveparamiko
):
5007 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5011 def upload_file_to_sftp_file(sftpfile
, url
):
5012 urlparts
= urlparse
.urlparse(url
);
5013 file_name
= os
.path
.basename(urlparts
.path
);
5014 file_dir
= os
.path
.dirname(urlparts
.path
);
5015 sftp_port
= urlparts
.port
;
5016 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5018 if(urlparts
.port
is None):
5021 sftp_port
= urlparts
.port
;
5022 if(urlparts
.username
is not None):
5023 sftp_username
= urlparts
.username
;
5025 sftp_username
= "anonymous";
5026 if(urlparts
.password
is not None):
5027 sftp_password
= urlparts
.password
;
5028 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5029 sftp_password
= "anonymous";
5032 if(urlparts
.scheme
!="sftp"):
5034 ssh
= paramiko
.SSHClient();
5035 ssh
.load_system_host_keys();
5036 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
5038 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5039 except paramiko
.ssh_exception
.SSHException
:
5041 except socket
.gaierror
:
5042 log
.info("Error With URL "+httpurl
);
5044 except socket
.timeout
:
5045 log
.info("Error With URL "+httpurl
);
5047 sftp
= ssh
.open_sftp();
5048 sftp
.putfo(sftpfile
, urlparts
.path
);
5051 sftpfile
.seek(0, 0);
5054 def upload_file_to_sftp_file(sftpfile
, url
):
5058 def upload_file_to_sftp_string(sftpstring
, url
):
5059 sftpfileo
= BytesIO(sftpstring
);
5060 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
5064 def upload_file_to_sftp_string(url
):
5069 def download_file_from_pysftp_file(url
):
5070 urlparts
= urlparse
.urlparse(url
);
5071 file_name
= os
.path
.basename(urlparts
.path
);
5072 file_dir
= os
.path
.dirname(urlparts
.path
);
5073 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5075 sftp_port
= urlparts
.port
;
5076 if(urlparts
.port
is None):
5079 sftp_port
= urlparts
.port
;
5080 if(urlparts
.username
is not None):
5081 sftp_username
= urlparts
.username
;
5083 sftp_username
= "anonymous";
5084 if(urlparts
.password
is not None):
5085 sftp_password
= urlparts
.password
;
5086 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5087 sftp_password
= "anonymous";
5090 if(urlparts
.scheme
!="sftp"):
5093 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5094 except paramiko
.ssh_exception
.SSHException
:
5096 except socket
.gaierror
:
5097 log
.info("Error With URL "+httpurl
);
5099 except socket
.timeout
:
5100 log
.info("Error With URL "+httpurl
);
5102 sftp
= ssh
.open_sftp();
5103 sftpfile
= BytesIO();
5104 sftp
.getfo(urlparts
.path
, sftpfile
);
5107 sftpfile
.seek(0, 0);
5110 def download_file_from_pysftp_file(url
):
5114 def download_file_from_pysftp_string(url
):
5115 sftpfile
= download_file_from_pysftp_file(url
);
5116 return sftpfile
.read();
5118 def download_file_from_ftp_string(url
):
5122 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5123 global geturls_download_sleep
, havezstd
, havebrotli
;
5125 sleep
= geturls_download_sleep
;
5128 urlparts
= urlparse
.urlparse(httpurl
);
5129 if(isinstance(httpheaders
, list)):
5130 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
5131 httpheaders
= fix_header_names(httpheaders
);
5132 if(isinstance(httpheaders
, dict)):
5133 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
5135 geturls_text
= download_file_from_pysftp_file(httpurl
);
5136 if(not geturls_text
):
5138 downloadsize
= None;
5139 if(downloadsize
is not None):
5140 downloadsize
= int(downloadsize
);
5141 if downloadsize
is None: downloadsize
= 0;
5144 log
.info("Downloading URL "+httpurl
);
5145 with
BytesIO() as strbuf
:
5147 databytes
= geturls_text
.read(buffersize
);
5148 if not databytes
: break;
5149 datasize
= len(databytes
);
5150 fulldatasize
= datasize
+ fulldatasize
;
5153 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5154 downloaddiff
= fulldatasize
- prevdownsize
;
5155 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5156 prevdownsize
= fulldatasize
;
5157 strbuf
.write(databytes
);
5159 returnval_content
= strbuf
.read();
5160 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5161 geturls_text
.close();
5165 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
5169 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5170 global geturls_download_sleep
, havezstd
, havebrotli
, tmpfileprefix
, tmpfilesuffix
;
5171 exec_time_start
= time
.time();
5172 myhash
= hashlib
.new("sha1");
5173 if(sys
.version
[0]=="2"):
5174 myhash
.update(httpurl
);
5175 myhash
.update(str(buffersize
));
5176 myhash
.update(str(exec_time_start
));
5177 if(sys
.version
[0]>="3"):
5178 myhash
.update(httpurl
.encode('utf-8'));
5179 myhash
.update(str(buffersize
).encode('utf-8'));
5180 myhash
.update(str(exec_time_start
).encode('utf-8'));
5181 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
5183 sleep
= geturls_download_sleep
;
5186 pretmpfilename
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
5187 if(not pretmpfilename
):
5189 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
5190 tmpfilename
= f
.name
;
5192 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
5193 except AttributeError:
5195 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
5200 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
5201 f
.write(pretmpfilename
['Content']);
5203 exec_time_end
= time
.time();
5204 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
5205 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
5209 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
5213 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5214 global geturls_download_sleep
, havezstd
, havebrotli
;
5216 sleep
= geturls_download_sleep
;
5219 if(not outfile
=="-"):
5220 outpath
= outpath
.rstrip(os
.path
.sep
);
5221 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
5222 if(not os
.path
.exists(outpath
)):
5223 os
.makedirs(outpath
);
5224 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
5226 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
5228 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5229 if(not pretmpfilename
):
5231 tmpfilename
= pretmpfilename
['Filename'];
5232 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5234 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
5235 exec_time_start
= time
.time();
5236 shutil
.move(tmpfilename
, filepath
);
5237 exec_time_end
= time
.time();
5238 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
5239 if(os
.path
.exists(tmpfilename
)):
5240 os
.remove(tmpfilename
);
5241 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
5243 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
5244 tmpfilename
= pretmpfilename
['Filename'];
5245 downloadsize
= int(os
.path
.getsize(tmpfilename
));
5248 exec_time_start
= time
.time();
5249 with
open(tmpfilename
, 'rb') as ft
:
5252 databytes
= ft
.read(buffersize
[1]);
5253 if not databytes
: break;
5254 datasize
= len(databytes
);
5255 fulldatasize
= datasize
+ fulldatasize
;
5258 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5259 downloaddiff
= fulldatasize
- prevdownsize
;
5260 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5261 prevdownsize
= fulldatasize
;
5264 fdata
= f
.getvalue();
5267 os
.remove(tmpfilename
);
5268 exec_time_end
= time
.time();
5269 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5270 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
5274 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
5278 def upload_file_to_pysftp_file(sftpfile
, url
):
5279 urlparts
= urlparse
.urlparse(url
);
5280 file_name
= os
.path
.basename(urlparts
.path
);
5281 file_dir
= os
.path
.dirname(urlparts
.path
);
5282 sftp_port
= urlparts
.port
;
5283 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5285 if(urlparts
.port
is None):
5288 sftp_port
= urlparts
.port
;
5289 if(urlparts
.username
is not None):
5290 sftp_username
= urlparts
.username
;
5292 sftp_username
= "anonymous";
5293 if(urlparts
.password
is not None):
5294 sftp_password
= urlparts
.password
;
5295 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5296 sftp_password
= "anonymous";
5299 if(urlparts
.scheme
!="sftp"):
5302 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5303 except paramiko
.ssh_exception
.SSHException
:
5305 except socket
.gaierror
:
5306 log
.info("Error With URL "+httpurl
);
5308 except socket
.timeout
:
5309 log
.info("Error With URL "+httpurl
);
5311 sftp
= ssh
.open_sftp();
5312 sftp
.putfo(sftpfile
, urlparts
.path
);
5315 sftpfile
.seek(0, 0);
5318 def upload_file_to_pysftp_file(sftpfile
, url
):
5322 def upload_file_to_pysftp_string(sftpstring
, url
):
5323 sftpfileo
= BytesIO(sftpstring
);
5324 sftpfile
= upload_file_to_pysftp_files(ftpfileo
, url
);
5328 def upload_file_to_pysftp_string(url
):