4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwgetold.py - Last Update: 10/5/2023 Ver. 2.0.2 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, zlib
, time
, argparse
, cgi
, subprocess
, socket
, email
.utils
, datetime
, time
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
29 havemechanize
= False;
34 havemechanize
= False;
62 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
90 if(sys
.version
[0]=="2"):
92 from io
import StringIO
, BytesIO
;
95 from cStringIO
import StringIO
;
96 from cStringIO
import StringIO
as BytesIO
;
98 from StringIO
import StringIO
;
99 from StringIO
import StringIO
as BytesIO
;
100 # From http://python-future.org/compatible_idioms.html
101 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
102 from urllib
import urlencode
;
103 from urllib
import urlopen
as urlopenalt
;
104 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
, URLError
, build_opener
, HTTPCookieProcessor
;
105 import urlparse
, cookielib
;
106 from httplib
import HTTPConnection
, HTTPSConnection
;
107 if(sys
.version
[0]>="3"):
108 from io
import StringIO
, BytesIO
;
109 # From http://python-future.org/compatible_idioms.html
110 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
111 from urllib
.request
import urlopen
, Request
, install_opener
, build_opener
, HTTPCookieProcessor
;
112 from urllib
.error
import HTTPError
, URLError
;
113 import urllib
.parse
as urlparse
;
114 import http
.cookiejar
as cookielib
;
115 from http
.client
import HTTPConnection
, HTTPSConnection
;
117 __program_name__
= "PyWWW-Get";
118 __program_alt_name__
= "PyWWWGet";
119 __program_small_name__
= "wwwget";
120 __project__
= __program_name__
;
121 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
122 __version_info__
= (2, 0, 2, "RC 1", 1);
123 __version_date_info__
= (2023, 10, 5, "RC 1", 1);
124 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
125 __revision__
= __version_info__
[3];
126 __revision_id__
= "$Id$";
127 if(__version_info__
[4] is not None):
128 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
129 if(__version_info__
[4] is None):
130 __version_date_plusrc__
= __version_date__
;
131 if(__version_info__
[3] is not None):
132 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
133 if(__version_info__
[3] is None):
134 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
136 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
138 pytempdir
= tempfile
.gettempdir();
140 PyBitness
= platform
.architecture();
141 if(PyBitness
=="32bit" or PyBitness
=="32"):
143 elif(PyBitness
=="64bit" or PyBitness
=="64"):
148 compression_supported
= "gzip, deflate";
150 compression_supported
= "gzip, deflate, br";
152 compression_supported
= "gzip, deflate";
154 geturls_cj
= cookielib
.CookieJar();
155 windowsNT4_ua_string
= "Windows NT 4.0";
156 windowsNT4_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
157 windows2k_ua_string
= "Windows NT 5.0";
158 windows2k_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
159 windowsXP_ua_string
= "Windows NT 5.1";
160 windowsXP_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
161 windowsXP64_ua_string
= "Windows NT 5.2; Win64; x64";
162 windowsXP64_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
163 windows7_ua_string
= "Windows NT 6.1; Win64; x64";
164 windows7_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
165 windows8_ua_string
= "Windows NT 6.2; Win64; x64";
166 windows8_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
167 windows81_ua_string
= "Windows NT 6.3; Win64; x64";
168 windows81_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
169 windows10_ua_string
= "Windows NT 10.0; Win64; x64";
170 windows10_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
171 windows11_ua_string
= "Windows NT 11.0; Win64; x64";
172 windows11_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
173 geturls_ua_firefox_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:109.0) Gecko/20100101 Firefox/117.0";
174 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
175 geturls_ua_chrome_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
176 geturls_ua_chromium_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
177 geturls_ua_palemoon_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
178 geturls_ua_opera_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
179 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
180 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; Trident/7.0; rv:11.0) like Gecko";
181 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
182 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
183 if(platform
.python_implementation()!=""):
184 py_implementation
= platform
.python_implementation();
185 if(platform
.python_implementation()==""):
186 py_implementation
= "Python";
187 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=py_implementation
, pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
188 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
189 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
190 geturls_ua
= geturls_ua_firefox_windows7
;
191 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
192 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
193 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
194 geturls_headers_chrome_windows7
.update(windows7_ua_addon
);
195 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
196 geturls_headers_chromium_windows7
.update(windows7_ua_addon
);
197 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
198 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
199 geturls_headers_opera_windows7
.update(windows7_ua_addon
);
200 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
201 geturls_headers_vivaldi_windows7
.update(windows7_ua_addon
);
202 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
203 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
204 geturls_headers_microsoft_edge_windows7
.update(windows7_ua_addon
);
205 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
206 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
207 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
208 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
209 geturls_headers
= geturls_headers_firefox_windows7
;
210 geturls_download_sleep
= 0;
212 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
213 if(outtype
=="print" and dbgenable
):
216 elif(outtype
=="log" and dbgenable
):
217 logging
.info(dbgtxt
);
219 elif(outtype
=="warning" and dbgenable
):
220 logging
.warning(dbgtxt
);
222 elif(outtype
=="error" and dbgenable
):
223 logging
.error(dbgtxt
);
225 elif(outtype
=="critical" and dbgenable
):
226 logging
.critical(dbgtxt
);
228 elif(outtype
=="exception" and dbgenable
):
229 logging
.exception(dbgtxt
);
231 elif(outtype
=="logalt" and dbgenable
):
232 logging
.log(dgblevel
, dbgtxt
);
234 elif(outtype
=="debug" and dbgenable
):
235 logging
.debug(dbgtxt
);
243 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
244 dbgout
= verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
249 def add_url_param(url
, **params
):
251 parts
= list(urlparse
.urlsplit(url
));
252 d
= dict(cgi
.parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
254 parts
[n
]=urlencode(d
);
255 return urlparse
.urlunsplit(parts
);
257 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
258 def which_exec(execfile):
259 for path
in os
.environ
["PATH"].split(":"):
260 if os
.path
.exists(path
+ "/" + execfile):
261 return path
+ "/" + execfile;
263 def listize(varlist
):
271 newlistreg
.update({ilx
: varlist
[il
]});
272 newlistrev
.update({varlist
[il
]: ilx
});
275 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
278 def twolistize(varlist
):
288 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
289 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
290 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
291 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
294 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
295 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
296 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
299 def arglistize(proexec
, *varlist
):
303 newarglist
= [proexec
];
305 if varlist
[il
][0] is not None:
306 newarglist
.append(varlist
[il
][0]);
307 if varlist
[il
][1] is not None:
308 newarglist
.append(varlist
[il
][1]);
312 def fix_header_names(header_dict
):
313 if(sys
.version
[0]=="2"):
314 header_dict
= {k
.title(): v
for k
, v
in header_dict
.iteritems()};
315 if(sys
.version
[0]>="3"):
316 header_dict
= {k
.title(): v
for k
, v
in header_dict
.items()};
319 # hms_string by ArcGIS Python Recipes
320 # https://arcpy.wordpress.com/2012/04/20/146/
321 def hms_string(sec_elapsed
):
322 h
= int(sec_elapsed
/ (60 * 60));
323 m
= int((sec_elapsed
% (60 * 60)) / 60);
324 s
= sec_elapsed
% 60.0;
325 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
327 # get_readable_size by Lipis
328 # http://stackoverflow.com/posts/14998888/revisions
329 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
331 if(unit
!="IEC" and unit
!="SI"):
334 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
335 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
338 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
339 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
344 if abs(bytes
) < unitsize
:
345 strformat
= "%3."+str(precision
)+"f%s";
346 pre_return_val
= (strformat
% (bytes
, unit
));
347 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
348 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
349 alt_return_val
= pre_return_val
.split();
350 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
353 strformat
= "%."+str(precision
)+"f%s";
354 pre_return_val
= (strformat
% (bytes
, "YiB"));
355 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
356 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
357 alt_return_val
= pre_return_val
.split();
358 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
361 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
363 usehashtypes
= usehashtypes
.lower();
364 getfilesize
= os
.path
.getsize(infile
);
365 return_val
= get_readable_size(getfilesize
, precision
, unit
);
367 hashtypelist
= usehashtypes
.split(",");
368 openfile
= open(infile
, "rb");
369 filecontents
= openfile
.read();
372 listnumend
= len(hashtypelist
);
373 while(listnumcount
< listnumend
):
374 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
375 hashtypelistup
= hashtypelistlow
.upper();
376 filehash
= hashlib
.new(hashtypelistup
);
377 filehash
.update(filecontents
);
378 filegethash
= filehash
.hexdigest();
379 return_val
.update({hashtypelistup
: filegethash
});
383 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
385 usehashtypes
= usehashtypes
.lower();
386 getfilesize
= len(instring
);
387 return_val
= get_readable_size(getfilesize
, precision
, unit
);
389 hashtypelist
= usehashtypes
.split(",");
391 listnumend
= len(hashtypelist
);
392 while(listnumcount
< listnumend
):
393 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
394 hashtypelistup
= hashtypelistlow
.upper();
395 filehash
= hashlib
.new(hashtypelistup
);
396 if(sys
.version
[0]=="2"):
397 filehash
.update(instring
);
398 if(sys
.version
[0]>="3"):
399 filehash
.update(instring
.encode('utf-8'));
400 filegethash
= filehash
.hexdigest();
401 return_val
.update({hashtypelistup
: filegethash
});
405 def http_status_to_reason(code
):
408 101: 'Switching Protocols',
413 203: 'Non-Authoritative Information',
415 205: 'Reset Content',
416 206: 'Partial Content',
418 208: 'Already Reported',
420 300: 'Multiple Choices',
421 301: 'Moved Permanently',
426 307: 'Temporary Redirect',
427 308: 'Permanent Redirect',
430 402: 'Payment Required',
433 405: 'Method Not Allowed',
434 406: 'Not Acceptable',
435 407: 'Proxy Authentication Required',
436 408: 'Request Timeout',
439 411: 'Length Required',
440 412: 'Precondition Failed',
441 413: 'Payload Too Large',
443 415: 'Unsupported Media Type',
444 416: 'Range Not Satisfiable',
445 417: 'Expectation Failed',
446 421: 'Misdirected Request',
447 422: 'Unprocessable Entity',
449 424: 'Failed Dependency',
450 426: 'Upgrade Required',
451 428: 'Precondition Required',
452 429: 'Too Many Requests',
453 431: 'Request Header Fields Too Large',
454 451: 'Unavailable For Legal Reasons',
455 500: 'Internal Server Error',
456 501: 'Not Implemented',
458 503: 'Service Unavailable',
459 504: 'Gateway Timeout',
460 505: 'HTTP Version Not Supported',
461 506: 'Variant Also Negotiates',
462 507: 'Insufficient Storage',
463 508: 'Loop Detected',
465 511: 'Network Authentication Required'
467 return reasons
.get(code
, 'Unknown Status Code');
469 def ftp_status_to_reason(code
):
471 110: 'Restart marker reply',
472 120: 'Service ready in nnn minutes',
473 125: 'Data connection already open; transfer starting',
474 150: 'File status okay; about to open data connection',
476 202: 'Command not implemented, superfluous at this site',
477 211: 'System status, or system help reply',
478 212: 'Directory status',
481 215: 'NAME system type',
482 220: 'Service ready for new user',
483 221: 'Service closing control connection',
484 225: 'Data connection open; no transfer in progress',
485 226: 'Closing data connection',
486 227: 'Entering Passive Mode',
487 230: 'User logged in, proceed',
488 250: 'Requested file action okay, completed',
489 257: '"PATHNAME" created',
490 331: 'User name okay, need password',
491 332: 'Need account for login',
492 350: 'Requested file action pending further information',
493 421: 'Service not available, closing control connection',
494 425: 'Can\'t open data connection',
495 426: 'Connection closed; transfer aborted',
496 450: 'Requested file action not taken',
497 451: 'Requested action aborted. Local error in processing',
498 452: 'Requested action not taken. Insufficient storage space in system',
499 500: 'Syntax error, command unrecognized',
500 501: 'Syntax error in parameters or arguments',
501 502: 'Command not implemented',
502 503: 'Bad sequence of commands',
503 504: 'Command not implemented for that parameter',
504 530: 'Not logged in',
505 532: 'Need account for storing files',
506 550: 'Requested action not taken. File unavailable',
507 551: 'Requested action aborted. Page type unknown',
508 552: 'Requested file action aborted. Exceeded storage allocation',
509 553: 'Requested action not taken. File name not allowed'
511 return reasons
.get(code
, 'Unknown Status Code');
513 def sftp_status_to_reason(code
):
517 2: 'SSH_FX_NO_SUCH_FILE',
518 3: 'SSH_FX_PERMISSION_DENIED',
520 5: 'SSH_FX_BAD_MESSAGE',
521 6: 'SSH_FX_NO_CONNECTION',
522 7: 'SSH_FX_CONNECTION_LOST',
523 8: 'SSH_FX_OP_UNSUPPORTED'
525 return reasons
.get(code
, 'Unknown Status Code');
527 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
528 if isinstance(headers
, dict):
530 if(sys
.version
[0]=="2"):
531 for headkey
, headvalue
in headers
.iteritems():
532 returnval
.append((headkey
, headvalue
));
533 if(sys
.version
[0]>="3"):
534 for headkey
, headvalue
in headers
.items():
535 returnval
.append((headkey
, headvalue
));
536 elif isinstance(headers
, list):
542 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
543 if isinstance(headers
, dict):
545 if(sys
.version
[0]=="2"):
546 for headkey
, headvalue
in headers
.iteritems():
547 returnval
.append(headkey
+": "+headvalue
);
548 if(sys
.version
[0]>="3"):
549 for headkey
, headvalue
in headers
.items():
550 returnval
.append(headkey
+": "+headvalue
);
551 elif isinstance(headers
, list):
557 def make_http_headers_from_pycurl_to_dict(headers
):
559 headers
= headers
.strip().split('\r\n');
560 for header
in headers
:
561 parts
= header
.split(': ', 1)
564 header_dict
[key
.title()] = value
;
567 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", compression_supported
), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
568 if isinstance(headers
, list):
573 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
575 elif isinstance(headers
, dict):
581 def get_httplib_support(checkvalue
=None):
582 global haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
584 returnval
.append("ftp");
585 returnval
.append("httplib");
587 returnval
.append("httplib2");
588 returnval
.append("urllib");
590 returnval
.append("urllib3");
591 returnval
.append("request3");
592 returnval
.append("request");
594 returnval
.append("requests");
596 returnval
.append("httpx");
597 returnval
.append("httpx2");
599 returnval
.append("mechanize");
601 returnval
.append("pycurl");
602 returnval
.append("pycurl2");
603 returnval
.append("pycurl3");
605 returnval
.append("sftp");
607 returnval
.append("pysftp");
608 if(not checkvalue
is None):
609 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
610 checkvalue
= "urllib";
611 if(checkvalue
=="httplib1"):
612 checkvalue
= "httplib";
613 if(checkvalue
in returnval
):
619 def check_httplib_support(checkvalue
="urllib"):
620 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
621 checkvalue
= "urllib";
622 if(checkvalue
=="httplib1"):
623 checkvalue
= "httplib";
624 returnval
= get_httplib_support(checkvalue
);
627 def get_httplib_support_list():
628 returnval
= get_httplib_support(None);
631 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1, timeout
=10):
632 global geturls_download_sleep
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
634 sleep
= geturls_download_sleep
;
637 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
638 httplibuse
= "urllib";
639 if(httplibuse
=="httplib1"):
640 httplibuse
= "httplib";
641 if(not haverequests
and httplibuse
=="requests"):
642 httplibuse
= "urllib";
643 if(not havehttpx
and httplibuse
=="httpx"):
644 httplibuse
= "urllib";
645 if(not havehttpx
and httplibuse
=="httpx2"):
646 httplibuse
= "urllib";
647 if(not havehttpcore
and httplibuse
=="httpcore"):
648 httplibuse
= "urllib";
649 if(not havehttpcore
and httplibuse
=="httpcore2"):
650 httplibuse
= "urllib";
651 if(not havemechanize
and httplibuse
=="mechanize"):
652 httplibuse
= "urllib";
653 if(not havepycurl
and httplibuse
=="pycurl"):
654 httplibuse
= "urllib";
655 if(not havepycurl
and httplibuse
=="pycurl2"):
656 httplibuse
= "urllib";
657 if(not havepycurl
and httplibuse
=="pycurl3"):
658 httplibuse
= "urllib";
659 if(not havehttplib2
and httplibuse
=="httplib2"):
660 httplibuse
= "httplib";
661 if(not haveparamiko
and httplibuse
=="sftp"):
663 if(not havepysftp
and httplibuse
=="pysftp"):
665 if(httplibuse
=="urllib" or httplibuse
=="request"):
666 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
667 elif(httplibuse
=="request"):
668 returnval
= download_from_url_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
669 elif(httplibuse
=="request3"):
670 returnval
= download_from_url_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
671 elif(httplibuse
=="httplib"):
672 returnval
= download_from_url_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
673 elif(httplibuse
=="httplib2"):
674 returnval
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
675 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
676 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
677 elif(httplibuse
=="requests"):
678 returnval
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
679 elif(httplibuse
=="httpx"):
680 returnval
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
681 elif(httplibuse
=="httpx2"):
682 returnval
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
683 elif(httplibuse
=="httpcore"):
684 returnval
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
685 elif(httplibuse
=="httpcore2"):
686 returnval
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
687 elif(httplibuse
=="mechanize"):
688 returnval
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
689 elif(httplibuse
=="pycurl"):
690 returnval
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
691 elif(httplibuse
=="pycurl2"):
692 returnval
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
693 elif(httplibuse
=="pycurl3"):
694 returnval
= download_from_url_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
695 elif(httplibuse
=="ftp"):
696 returnval
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
697 elif(httplibuse
=="sftp"):
698 returnval
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
699 elif(httplibuse
=="pysftp"):
700 returnval
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
705 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
706 global geturls_download_sleep
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
708 sleep
= geturls_download_sleep
;
711 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
712 httplibuse
= "urllib";
713 if(httplibuse
=="httplib1"):
714 httplibuse
= "httplib";
715 if(not haverequests
and httplibuse
=="requests"):
716 httplibuse
= "urllib";
717 if(not havehttpx
and httplibuse
=="httpx"):
718 httplibuse
= "urllib";
719 if(not havehttpx
and httplibuse
=="httpx2"):
720 httplibuse
= "urllib";
721 if(not havehttpcore
and httplibuse
=="httpcore"):
722 httplibuse
= "urllib";
723 if(not havehttpcore
and httplibuse
=="httpcore2"):
724 httplibuse
= "urllib";
725 if(not havemechanize
and httplibuse
=="mechanize"):
726 httplibuse
= "urllib";
727 if(not havepycurl
and httplibuse
=="pycurl"):
728 httplibuse
= "urllib";
729 if(not havepycurl
and httplibuse
=="pycurl2"):
730 httplibuse
= "urllib";
731 if(not havepycurl
and httplibuse
=="pycurl3"):
732 httplibuse
= "urllib";
733 if(not havehttplib2
and httplibuse
=="httplib2"):
734 httplibuse
= "httplib";
735 if(not haveparamiko
and httplibuse
=="sftp"):
737 if(not haveparamiko
and httplibuse
=="pysftp"):
739 if(httplibuse
=="urllib" or httplibuse
=="request"):
740 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
741 elif(httplibuse
=="request"):
742 returnval
= download_from_url_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
743 elif(httplibuse
=="request3"):
744 returnval
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
745 elif(httplibuse
=="httplib"):
746 returnval
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
747 elif(httplibuse
=="httplib2"):
748 returnval
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
749 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
750 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
751 elif(httplibuse
=="requests"):
752 returnval
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
753 elif(httplibuse
=="httpx"):
754 returnval
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
755 elif(httplibuse
=="httpx2"):
756 returnval
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
757 elif(httplibuse
=="httpcore"):
758 returnval
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
759 elif(httplibuse
=="httpcore2"):
760 returnval
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
761 elif(httplibuse
=="mechanize"):
762 returnval
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
763 elif(httplibuse
=="pycurl"):
764 returnval
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
765 elif(httplibuse
=="pycurl2"):
766 returnval
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
767 elif(httplibuse
=="pycurl3"):
768 returnval
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
769 elif(httplibuse
=="ftp"):
770 returnval
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
771 elif(httplibuse
=="sftp"):
772 returnval
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
773 elif(httplibuse
=="pysftp"):
774 returnval
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
779 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
780 global geturls_download_sleep
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
782 sleep
= geturls_download_sleep
;
785 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
786 httplibuse
= "urllib";
787 if(httplibuse
=="httplib1"):
788 httplibuse
= "httplib";
789 if(not haverequests
and httplibuse
=="requests"):
790 httplibuse
= "urllib";
791 if(not havehttpx
and httplibuse
=="httpx"):
792 httplibuse
= "urllib";
793 if(not havehttpx
and httplibuse
=="httpx2"):
794 httplibuse
= "urllib";
795 if(not havehttpcore
and httplibuse
=="httpcore"):
796 httplibuse
= "urllib";
797 if(not havehttpcore
and httplibuse
=="httpcore2"):
798 httplibuse
= "urllib";
799 if(not havemechanize
and httplibuse
=="mechanize"):
800 httplibuse
= "urllib";
801 if(not havepycurl
and httplibuse
=="pycurl"):
802 httplibuse
= "urllib";
803 if(not havepycurl
and httplibuse
=="pycurl2"):
804 httplibuse
= "urllib";
805 if(not havepycurl
and httplibuse
=="pycurl3"):
806 httplibuse
= "urllib";
807 if(not havehttplib2
and httplibuse
=="httplib2"):
808 httplibuse
= "httplib";
809 if(not haveparamiko
and httplibuse
=="sftp"):
811 if(not havepysftp
and httplibuse
=="pysftp"):
813 if(httplibuse
=="urllib" or httplibuse
=="request"):
814 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
815 elif(httplibuse
=="request"):
816 returnval
= download_from_url_to_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
817 elif(httplibuse
=="request3"):
818 returnval
= download_from_url_to_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
819 elif(httplibuse
=="httplib"):
820 returnval
= download_from_url_to_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
821 elif(httplibuse
=="httplib2"):
822 returnval
= download_from_url_to_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
823 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
824 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
825 elif(httplibuse
=="requests"):
826 returnval
= download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
827 elif(httplibuse
=="httpx"):
828 returnval
= download_from_url_to_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
829 elif(httplibuse
=="httpx2"):
830 returnval
= download_from_url_to_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
831 elif(httplibuse
=="httpcore"):
832 returnval
= download_from_url_to_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
833 elif(httplibuse
=="httpcore2"):
834 returnval
= download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
);
835 elif(httplibuse
=="mechanize"):
836 returnval
= download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
837 elif(httplibuse
=="pycurl"):
838 returnval
= download_from_url_to_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
839 elif(httplibuse
=="pycurl2"):
840 returnval
= download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
841 elif(httplibuse
=="pycurl3"):
842 returnval
= download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
843 elif(httplibuse
=="ftp"):
844 returnval
= download_from_url_to_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
845 elif(httplibuse
=="sftp"):
846 returnval
= download_from_url_to_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
847 elif(httplibuse
=="pysftp"):
848 returnval
= download_from_url_to_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
853 def download_from_url_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
854 global geturls_download_sleep
, havebrotli
;
856 sleep
= geturls_download_sleep
;
859 urlparts
= urlparse
.urlparse(httpurl
);
860 if(isinstance(httpheaders
, list)):
861 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
862 httpheaders
= fix_header_names(httpheaders
);
863 if(httpuseragent
is not None):
864 if('User-Agent' in httpheaders
):
865 httpheaders
['User-Agent'] = httpuseragent
;
867 httpuseragent
.update({'User-Agent': httpuseragent
});
868 if(httpreferer
is not None):
869 if('Referer' in httpheaders
):
870 httpheaders
['Referer'] = httpreferer
;
872 httpuseragent
.update({'Referer': httpreferer
});
873 if(urlparts
.username
is not None or urlparts
.password
is not None):
874 if(sys
.version
[0]=="2"):
875 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
876 if(sys
.version
[0]>="3"):
877 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
878 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
879 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
880 if(isinstance(httpheaders
, dict)):
881 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
882 geturls_opener
.addheaders
= httpheaders
;
884 if(postdata
is not None and not isinstance(postdata
, dict)):
885 postdata
= urlencode(postdata
);
887 geturls_request
= Request(httpurl
);
888 if(httpmethod
=="GET"):
889 geturls_text
= geturls_opener
.open(geturls_request
);
890 elif(httpmethod
=="POST"):
891 geturls_text
= geturls_opener
.open(geturls_request
, data
=postdata
);
893 geturls_text
= geturls_opener
.open(geturls_request
);
894 except HTTPError
as geturls_text_error
:
895 geturls_text
= geturls_text_error
;
896 log
.info("Error With URL "+httpurl
);
898 log
.info("Error With URL "+httpurl
);
900 except socket
.timeout
:
901 log
.info("Error With URL "+httpurl
);
903 httpcodeout
= geturls_text
.getcode();
905 httpcodereason
= geturls_text
.reason
;
906 except AttributeError:
907 httpcodereason
= http_status_to_reason(geturls_text
.getcode());
909 httpversionout
= geturls_text
.version
;
910 except AttributeError:
911 httpversionout
= "1.1";
912 httpmethodout
= geturls_request
.get_method();
913 httpurlout
= geturls_text
.geturl();
914 httpheaderout
= geturls_text
.info();
915 httpheadersentout
= httpheaders
;
916 if(isinstance(httpheaderout
, list)):
917 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
918 httpheaderout
= fix_header_names(httpheaderout
);
919 if(sys
.version
[0]=="2"):
921 prehttpheaderout
= httpheaderout
;
922 httpheaderkeys
= httpheaderout
.keys();
923 imax
= len(httpheaderkeys
);
927 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
929 except AttributeError:
931 if(isinstance(httpheadersentout
, list)):
932 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
933 httpheadersentout
= fix_header_names(httpheadersentout
);
934 log
.info("Downloading URL "+httpurl
);
935 downloadsize
= httpheaderout
.get('Content-Length');
936 if(downloadsize
is not None):
937 downloadsize
= int(downloadsize
);
938 if downloadsize
is None: downloadsize
= 0;
941 log
.info("Downloading URL "+httpurl
);
942 with
BytesIO() as strbuf
:
944 databytes
= geturls_text
.read(buffersize
);
945 if not databytes
: break;
946 datasize
= len(databytes
);
947 fulldatasize
= datasize
+ fulldatasize
;
950 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
951 downloaddiff
= fulldatasize
- prevdownsize
;
952 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
953 prevdownsize
= fulldatasize
;
954 strbuf
.write(databytes
);
956 returnval_content
= strbuf
.read();
957 if(httpheaderout
.get("Content-Encoding")=="gzip"):
959 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
962 if(httpheaderout
.get("Content-Encoding")=="deflate"):
964 returnval_content
= zlib
.decompress(returnval_content
);
967 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
969 returnval_content
= brotli
.decompress(returnval_content
);
972 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
973 geturls_text
.close();
976 def download_from_url_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
977 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
978 exec_time_start
= time
.time();
979 myhash
= hashlib
.new("sha1");
980 if(sys
.version
[0]=="2"):
981 myhash
.update(httpurl
);
982 myhash
.update(str(buffersize
));
983 myhash
.update(str(exec_time_start
));
984 if(sys
.version
[0]>="3"):
985 myhash
.update(httpurl
.encode('utf-8'));
986 myhash
.update(str(buffersize
).encode('utf-8'));
987 myhash
.update(str(exec_time_start
).encode('utf-8'));
988 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
990 sleep
= geturls_download_sleep
;
993 pretmpfilename
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
994 if(not pretmpfilename
):
996 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
997 tmpfilename
= f
.name
;
999 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1000 except AttributeError:
1002 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1007 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
1008 f
.write(pretmpfilename
['Content']);
1010 exec_time_end
= time
.time();
1011 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1012 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1015 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1016 global geturls_download_sleep
;
1018 sleep
= geturls_download_sleep
;
1021 if(not outfile
=="-"):
1022 outpath
= outpath
.rstrip(os
.path
.sep
);
1023 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1024 if(not os
.path
.exists(outpath
)):
1025 os
.makedirs(outpath
);
1026 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1028 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1030 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1031 if(not pretmpfilename
):
1033 tmpfilename
= pretmpfilename
['Filename'];
1034 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1036 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1037 exec_time_start
= time
.time();
1038 shutil
.move(tmpfilename
, filepath
);
1040 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1041 except AttributeError:
1043 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1048 exec_time_end
= time
.time();
1049 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1050 if(os
.path
.exists(tmpfilename
)):
1051 os
.remove(tmpfilename
);
1052 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1054 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1055 tmpfilename
= pretmpfilename
['Filename'];
1056 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1059 exec_time_start
= time
.time();
1060 with
open(tmpfilename
, 'rb') as ft
:
1063 databytes
= ft
.read(buffersize
[1]);
1064 if not databytes
: break;
1065 datasize
= len(databytes
);
1066 fulldatasize
= datasize
+ fulldatasize
;
1069 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1070 downloaddiff
= fulldatasize
- prevdownsize
;
1071 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1072 prevdownsize
= fulldatasize
;
1075 fdata
= f
.getvalue();
1078 os
.remove(tmpfilename
);
1079 exec_time_end
= time
.time();
1080 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1081 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1084 def download_from_url_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1085 global geturls_download_sleep
, havebrotli
;
1087 sleep
= geturls_download_sleep
;
1090 urlparts
= urlparse
.urlparse(httpurl
);
1091 if(isinstance(httpheaders
, list)):
1092 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1093 httpheaders
= fix_header_names(httpheaders
);
1094 if(httpuseragent
is not None):
1095 if('User-Agent' in httpheaders
):
1096 httpheaders
['User-Agent'] = httpuseragent
;
1098 httpuseragent
.update({'User-Agent': httpuseragent
});
1099 if(httpreferer
is not None):
1100 if('Referer' in httpheaders
):
1101 httpheaders
['Referer'] = httpreferer
;
1103 httpuseragent
.update({'Referer': httpreferer
});
1104 if(urlparts
.username
is not None or urlparts
.password
is not None):
1105 if(sys
.version
[0]=="2"):
1106 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1107 if(sys
.version
[0]>="3"):
1108 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1109 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1110 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1111 geturls_opener
.addheaders
= httpheaders
;
1113 if(urlparts
[0]=="http"):
1114 httpconn
= HTTPConnection(urlparts
[1], timeout
=timeout
);
1115 elif(urlparts
[0]=="https"):
1116 httpconn
= HTTPSConnection(urlparts
[1], timeout
=timeout
);
1119 if(postdata
is not None and not isinstance(postdata
, dict)):
1120 postdata
= urlencode(postdata
);
1122 if(httpmethod
=="GET"):
1123 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1124 elif(httpmethod
=="POST"):
1125 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1127 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1128 except socket
.timeout
:
1129 log
.info("Error With URL "+httpurl
);
1131 except socket
.gaierror
:
1132 log
.info("Error With URL "+httpurl
);
1134 except BlockingIOError
:
1135 log
.info("Error With URL "+httpurl
);
1137 geturls_text
= httpconn
.getresponse();
1138 httpcodeout
= geturls_text
.status
;
1139 httpcodereason
= geturls_text
.reason
;
1140 if(geturls_text
.version
=="10"):
1141 httpversionout
= "1.0";
1143 httpversionout
= "1.1";
1144 httpmethodout
= geturls_text
._method
;
1145 httpurlout
= httpurl
;
1146 httpheaderout
= geturls_text
.getheaders();
1147 httpheadersentout
= httpheaders
;
1148 if(isinstance(httpheaderout
, list)):
1149 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1150 if(sys
.version
[0]=="2"):
1152 prehttpheaderout
= httpheaderout
;
1153 httpheaderkeys
= httpheaderout
.keys();
1154 imax
= len(httpheaderkeys
);
1158 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1160 except AttributeError:
1162 httpheaderout
= fix_header_names(httpheaderout
);
1163 if(isinstance(httpheadersentout
, list)):
1164 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1165 httpheadersentout
= fix_header_names(httpheadersentout
);
1166 log
.info("Downloading URL "+httpurl
);
1167 downloadsize
= httpheaderout
.get('Content-Length');
1168 if(downloadsize
is not None):
1169 downloadsize
= int(downloadsize
);
1170 if downloadsize
is None: downloadsize
= 0;
1173 log
.info("Downloading URL "+httpurl
);
1174 with
BytesIO() as strbuf
:
1176 databytes
= geturls_text
.read(buffersize
);
1177 if not databytes
: break;
1178 datasize
= len(databytes
);
1179 fulldatasize
= datasize
+ fulldatasize
;
1182 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1183 downloaddiff
= fulldatasize
- prevdownsize
;
1184 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1185 prevdownsize
= fulldatasize
;
1186 strbuf
.write(databytes
);
1188 returnval_content
= strbuf
.read();
1189 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1191 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1194 if(httpheaderout
.get("Content-Encoding")=="deflate"):
1196 returnval_content
= zlib
.decompress(returnval_content
);
1199 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1201 returnval_content
= brotli
.decompress(returnval_content
);
1202 except brotli
.error
:
1204 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1205 geturls_text
.close();
1208 def download_from_url_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1209 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1210 exec_time_start
= time
.time();
1211 myhash
= hashlib
.new("sha1");
1212 if(sys
.version
[0]=="2"):
1213 myhash
.update(httpurl
);
1214 myhash
.update(str(buffersize
));
1215 myhash
.update(str(exec_time_start
));
1216 if(sys
.version
[0]>="3"):
1217 myhash
.update(httpurl
.encode('utf-8'));
1218 myhash
.update(str(buffersize
).encode('utf-8'));
1219 myhash
.update(str(exec_time_start
).encode('utf-8'));
1220 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1222 sleep
= geturls_download_sleep
;
1225 pretmpfilename
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1226 if(not pretmpfilename
):
1228 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1229 tmpfilename
= f
.name
;
1231 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1232 except AttributeError:
1234 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1239 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
1240 f
.write(pretmpfilename
['Content']);
1242 exec_time_end
= time
.time();
1243 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1244 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1247 def download_from_url_to_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1248 global geturls_download_sleep
;
1250 sleep
= geturls_download_sleep
;
1253 if(not outfile
=="-"):
1254 outpath
= outpath
.rstrip(os
.path
.sep
);
1255 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1256 if(not os
.path
.exists(outpath
)):
1257 os
.makedirs(outpath
);
1258 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1260 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1262 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1263 if(not pretmpfilename
):
1265 tmpfilename
= pretmpfilename
['Filename'];
1266 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1268 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1269 exec_time_start
= time
.time();
1270 shutil
.move(tmpfilename
, filepath
);
1272 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1273 except AttributeError:
1275 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1280 exec_time_end
= time
.time();
1281 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1282 if(os
.path
.exists(tmpfilename
)):
1283 os
.remove(tmpfilename
);
1284 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1286 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1287 tmpfilename
= pretmpfilename
['Filename'];
1288 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1291 exec_time_start
= time
.time();
1292 with
open(tmpfilename
, 'rb') as ft
:
1295 databytes
= ft
.read(buffersize
[1]);
1296 if not databytes
: break;
1297 datasize
= len(databytes
);
1298 fulldatasize
= datasize
+ fulldatasize
;
1301 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1302 downloaddiff
= fulldatasize
- prevdownsize
;
1303 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1304 prevdownsize
= fulldatasize
;
1307 fdata
= f
.getvalue();
1310 os
.remove(tmpfilename
);
1311 exec_time_end
= time
.time();
1312 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1313 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1317 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1318 global geturls_download_sleep
, havebrotli
;
1320 sleep
= geturls_download_sleep
;
1323 urlparts
= urlparse
.urlparse(httpurl
);
1324 if(isinstance(httpheaders
, list)):
1325 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1326 httpheaders
= fix_header_names(httpheaders
);
1327 if(httpuseragent
is not None):
1328 if('User-Agent' in httpheaders
):
1329 httpheaders
['User-Agent'] = httpuseragent
;
1331 httpuseragent
.update({'User-Agent': httpuseragent
});
1332 if(httpreferer
is not None):
1333 if('Referer' in httpheaders
):
1334 httpheaders
['Referer'] = httpreferer
;
1336 httpuseragent
.update({'Referer': httpreferer
});
1337 if(urlparts
.username
is not None or urlparts
.password
is not None):
1338 if(sys
.version
[0]=="2"):
1339 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1340 if(sys
.version
[0]>="3"):
1341 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1342 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1343 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1344 geturls_opener
.addheaders
= httpheaders
;
1346 if(urlparts
[0]=="http"):
1347 httpconn
= HTTPConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
1348 elif(urlparts
[0]=="https"):
1349 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
1352 if(postdata
is not None and not isinstance(postdata
, dict)):
1353 postdata
= urlencode(postdata
);
1355 if(httpmethod
=="GET"):
1356 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1357 elif(httpmethod
=="POST"):
1358 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1360 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1361 except socket
.timeout
:
1362 log
.info("Error With URL "+httpurl
);
1364 except socket
.gaierror
:
1365 log
.info("Error With URL "+httpurl
);
1367 except BlockingIOError
:
1368 log
.info("Error With URL "+httpurl
);
1370 geturls_text
= httpconn
.getresponse();
1371 httpcodeout
= geturls_text
.status
;
1372 httpcodereason
= geturls_text
.reason
;
1373 if(geturls_text
.version
=="10"):
1374 httpversionout
= "1.0";
1376 httpversionout
= "1.1";
1377 httpmethodout
= httpmethod
;
1378 httpurlout
= httpurl
;
1379 httpheaderout
= geturls_text
.getheaders();
1380 httpheadersentout
= httpheaders
;
1381 if(isinstance(httpheaderout
, list)):
1382 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1383 if(sys
.version
[0]=="2"):
1385 prehttpheaderout
= httpheaderout
;
1386 httpheaderkeys
= httpheaderout
.keys();
1387 imax
= len(httpheaderkeys
);
1391 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1393 except AttributeError:
1395 httpheaderout
= fix_header_names(httpheaderout
);
1396 if(isinstance(httpheadersentout
, list)):
1397 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1398 httpheadersentout
= fix_header_names(httpheadersentout
);
1399 log
.info("Downloading URL "+httpurl
);
1400 downloadsize
= httpheaderout
.get('Content-Length');
1401 if(downloadsize
is not None):
1402 downloadsize
= int(downloadsize
);
1403 if downloadsize
is None: downloadsize
= 0;
1406 log
.info("Downloading URL "+httpurl
);
1407 with
BytesIO() as strbuf
:
1409 databytes
= geturls_text
.read(buffersize
);
1410 if not databytes
: break;
1411 datasize
= len(databytes
);
1412 fulldatasize
= datasize
+ fulldatasize
;
1415 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1416 downloaddiff
= fulldatasize
- prevdownsize
;
1417 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1418 prevdownsize
= fulldatasize
;
1419 strbuf
.write(databytes
);
1421 returnval_content
= strbuf
.read();
1422 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1424 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1427 if(httpheaderout
.get("Content-Encoding")=="deflate"):
1429 returnval_content
= zlib
.decompress(returnval_content
);
1432 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1434 returnval_content
= brotli
.decompress(returnval_content
);
1435 except brotli
.error
:
1437 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1438 geturls_text
.close();
1441 if(not havehttplib2
):
1442 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1443 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1447 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1448 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1449 exec_time_start
= time
.time();
1450 myhash
= hashlib
.new("sha1");
1451 if(sys
.version
[0]=="2"):
1452 myhash
.update(httpurl
);
1453 myhash
.update(str(buffersize
));
1454 myhash
.update(str(exec_time_start
));
1455 if(sys
.version
[0]>="3"):
1456 myhash
.update(httpurl
.encode('utf-8'));
1457 myhash
.update(str(buffersize
).encode('utf-8'));
1458 myhash
.update(str(exec_time_start
).encode('utf-8'));
1459 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1461 sleep
= geturls_download_sleep
;
1464 pretmpfilename
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1465 if(not pretmpfilename
):
1467 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1468 tmpfilename
= f
.name
;
1470 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1471 except AttributeError:
1473 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1478 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
1479 f
.write(pretmpfilename
['Content']);
1481 exec_time_end
= time
.time();
1482 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1483 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1486 if(not havehttplib2
):
1487 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1488 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1492 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1493 global geturls_download_sleep
;
1495 sleep
= geturls_download_sleep
;
1498 if(not outfile
=="-"):
1499 outpath
= outpath
.rstrip(os
.path
.sep
);
1500 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1501 if(not os
.path
.exists(outpath
)):
1502 os
.makedirs(outpath
);
1503 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1505 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1507 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1508 if(not pretmpfilename
):
1510 tmpfilename
= pretmpfilename
['Filename'];
1511 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1513 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1514 exec_time_start
= time
.time();
1515 shutil
.move(tmpfilename
, filepath
);
1517 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1518 except AttributeError:
1520 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1525 exec_time_end
= time
.time();
1526 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1527 if(os
.path
.exists(tmpfilename
)):
1528 os
.remove(tmpfilename
);
1529 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1531 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1532 tmpfilename
= pretmpfilename
['Filename'];
1533 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1536 exec_time_start
= time
.time();
1537 with
open(tmpfilename
, 'rb') as ft
:
1540 databytes
= ft
.read(buffersize
[1]);
1541 if not databytes
: break;
1542 datasize
= len(databytes
);
1543 fulldatasize
= datasize
+ fulldatasize
;
1546 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1547 downloaddiff
= fulldatasize
- prevdownsize
;
1548 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1549 prevdownsize
= fulldatasize
;
1552 fdata
= f
.getvalue();
1555 os
.remove(tmpfilename
);
1556 exec_time_end
= time
.time();
1557 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1558 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1561 if(not havehttplib2
):
1562 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1563 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1566 def download_from_url_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1567 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1570 def download_from_url_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1571 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1574 def download_from_url_to_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1575 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1579 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1580 global geturls_download_sleep
, havebrotli
;
1582 sleep
= geturls_download_sleep
;
1585 urlparts
= urlparse
.urlparse(httpurl
);
1586 if(isinstance(httpheaders
, list)):
1587 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1588 httpheaders
= fix_header_names(httpheaders
);
1589 if(httpuseragent
is not None):
1590 if('User-Agent' in httpheaders
):
1591 httpheaders
['User-Agent'] = httpuseragent
;
1593 httpuseragent
.update({'User-Agent': httpuseragent
});
1594 if(httpreferer
is not None):
1595 if('Referer' in httpheaders
):
1596 httpheaders
['Referer'] = httpreferer
;
1598 httpuseragent
.update({'Referer': httpreferer
});
1599 if(urlparts
.username
is not None or urlparts
.password
is not None):
1600 if(sys
.version
[0]=="2"):
1601 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1602 if(sys
.version
[0]>="3"):
1603 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1604 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1606 if(postdata
is not None and not isinstance(postdata
, dict)):
1607 postdata
= urlencode(postdata
);
1609 reqsession
= requests
.Session();
1610 if(httpmethod
=="GET"):
1611 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1612 elif(httpmethod
=="POST"):
1613 geturls_text
= reqsession
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1615 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1616 except requests
.exceptions
.ConnectTimeout
:
1617 log
.info("Error With URL "+httpurl
);
1619 except requests
.exceptions
.ConnectError
:
1620 log
.info("Error With URL "+httpurl
);
1622 except socket
.timeout
:
1623 log
.info("Error With URL "+httpurl
);
1625 httpcodeout
= geturls_text
.status_code
;
1626 httpcodereason
= geturls_text
.reason
;
1627 if(geturls_text
.raw
.version
=="10"):
1628 httpversionout
= "1.0";
1630 httpversionout
= "1.1";
1631 httpmethodout
= httpmethod
;
1632 httpurlout
= geturls_text
.url
;
1633 httpheaderout
= geturls_text
.headers
;
1634 httpheadersentout
= geturls_text
.request
.headers
;
1635 if(isinstance(httpheaderout
, list)):
1636 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1637 if(sys
.version
[0]=="2"):
1639 prehttpheaderout
= httpheaderout
;
1640 httpheaderkeys
= httpheaderout
.keys();
1641 imax
= len(httpheaderkeys
);
1645 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1647 except AttributeError:
1649 httpheaderout
= fix_header_names(httpheaderout
);
1650 if(isinstance(httpheadersentout
, list)):
1651 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1652 httpheadersentout
= fix_header_names(httpheadersentout
);
1653 log
.info("Downloading URL "+httpurl
);
1654 downloadsize
= httpheaderout
.get('Content-Length');
1655 if(downloadsize
is not None):
1656 downloadsize
= int(downloadsize
);
1657 if downloadsize
is None: downloadsize
= 0;
1660 log
.info("Downloading URL "+httpurl
);
1661 with
BytesIO() as strbuf
:
1663 databytes
= geturls_text
.raw
.read(buffersize
);
1664 if not databytes
: break;
1665 datasize
= len(databytes
);
1666 fulldatasize
= datasize
+ fulldatasize
;
1669 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1670 downloaddiff
= fulldatasize
- prevdownsize
;
1671 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1672 prevdownsize
= fulldatasize
;
1673 strbuf
.write(databytes
);
1675 returnval_content
= strbuf
.read();
1676 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1678 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1681 if(httpheaderout
.get("Content-Encoding")=="deflate"):
1683 returnval_content
= zlib
.decompress(returnval_content
);
1686 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1688 returnval_content
= brotli
.decompress(returnval_content
);
1689 except brotli
.error
:
1691 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1692 geturls_text
.close();
1695 if(not haverequests
):
1696 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1697 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1701 def download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1702 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1703 exec_time_start
= time
.time();
1704 myhash
= hashlib
.new("sha1");
1705 if(sys
.version
[0]=="2"):
1706 myhash
.update(httpurl
);
1707 myhash
.update(str(buffersize
));
1708 myhash
.update(str(exec_time_start
));
1709 if(sys
.version
[0]>="3"):
1710 myhash
.update(httpurl
.encode('utf-8'));
1711 myhash
.update(str(buffersize
).encode('utf-8'));
1712 myhash
.update(str(exec_time_start
).encode('utf-8'));
1713 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1715 sleep
= geturls_download_sleep
;
1718 pretmpfilename
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1719 if(not pretmpfilename
):
1721 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1722 tmpfilename
= f
.name
;
1724 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1725 except AttributeError:
1727 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1732 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
1733 f
.write(pretmpfilename
['Content']);
1735 exec_time_end
= time
.time();
1736 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1737 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1740 if(not haverequests
):
1741 def download_from_url_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1742 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1746 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1747 global geturls_download_sleep
;
1749 sleep
= geturls_download_sleep
;
1752 if(not outfile
=="-"):
1753 outpath
= outpath
.rstrip(os
.path
.sep
);
1754 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1755 if(not os
.path
.exists(outpath
)):
1756 os
.makedirs(outpath
);
1757 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1759 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1761 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1762 if(not pretmpfilename
):
1764 tmpfilename
= pretmpfilename
['Filename'];
1765 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1767 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1768 exec_time_start
= time
.time();
1769 shutil
.move(tmpfilename
, filepath
);
1771 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1772 except AttributeError:
1774 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1779 exec_time_end
= time
.time();
1780 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1781 if(os
.path
.exists(tmpfilename
)):
1782 os
.remove(tmpfilename
);
1783 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1785 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
1786 tmpfilename
= pretmpfilename
['Filename'];
1787 downloadsize
= int(os
.path
.getsize(tmpfilename
));
1790 exec_time_start
= time
.time();
1791 with
open(tmpfilename
, 'rb') as ft
:
1794 databytes
= ft
.read(buffersize
[1]);
1795 if not databytes
: break;
1796 datasize
= len(databytes
);
1797 fulldatasize
= datasize
+ fulldatasize
;
1800 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1801 downloaddiff
= fulldatasize
- prevdownsize
;
1802 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1803 prevdownsize
= fulldatasize
;
1806 fdata
= f
.getvalue();
1809 os
.remove(tmpfilename
);
1810 exec_time_end
= time
.time();
1811 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1812 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1815 if(not haverequests
):
1816 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1817 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
1821 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1822 global geturls_download_sleep
, havebrotli
;
1824 sleep
= geturls_download_sleep
;
1827 urlparts
= urlparse
.urlparse(httpurl
);
1828 if(isinstance(httpheaders
, list)):
1829 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1830 httpheaders
= fix_header_names(httpheaders
);
1831 if(httpuseragent
is not None):
1832 if('User-Agent' in httpheaders
):
1833 httpheaders
['User-Agent'] = httpuseragent
;
1835 httpuseragent
.update({'User-Agent': httpuseragent
});
1836 if(httpreferer
is not None):
1837 if('Referer' in httpheaders
):
1838 httpheaders
['Referer'] = httpreferer
;
1840 httpuseragent
.update({'Referer': httpreferer
});
1841 if(urlparts
.username
is not None or urlparts
.password
is not None):
1842 if(sys
.version
[0]=="2"):
1843 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1844 if(sys
.version
[0]>="3"):
1845 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1846 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1848 if(postdata
is not None and not isinstance(postdata
, dict)):
1849 postdata
= urlencode(postdata
);
1851 if(httpmethod
=="GET"):
1852 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1853 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
1854 elif(httpmethod
=="POST"):
1855 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1856 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1858 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1859 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
1860 except httpx
.ConnectTimeout
:
1861 log
.info("Error With URL "+httpurl
);
1863 except httpx
.ConnectError
:
1864 log
.info("Error With URL "+httpurl
);
1866 except socket
.timeout
:
1867 log
.info("Error With URL "+httpurl
);
1869 httpcodeout
= geturls_text
.status_code
;
1870 httpcodereason
= geturls_text
.reason_phrase
;
1871 httpversionout
= geturls_text
.http_version
;
1872 httpmethodout
= httpmethod
;
1873 httpurlout
= str(geturls_text
.url
);
1874 httpheaderout
= geturls_text
.headers
;
1875 httpheadersentout
= geturls_text
.request
.headers
;
1876 if(isinstance(httpheaderout
, list)):
1877 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1878 if(sys
.version
[0]=="2"):
1880 prehttpheaderout
= httpheaderout
;
1881 httpheaderkeys
= httpheaderout
.keys();
1882 imax
= len(httpheaderkeys
);
1886 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1888 except AttributeError:
1890 httpheaderout
= fix_header_names(httpheaderout
);
1891 if(isinstance(httpheadersentout
, list)):
1892 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1893 httpheadersentout
= fix_header_names(httpheadersentout
);
1894 log
.info("Downloading URL "+httpurl
);
1895 downloadsize
= httpheaderout
.get('Content-Length');
1896 if(downloadsize
is not None):
1897 downloadsize
= int(downloadsize
);
1898 if downloadsize
is None: downloadsize
= 0;
1901 log
.info("Downloading URL "+httpurl
);
1902 with
BytesIO() as strbuf
:
1904 databytes
= geturls_text
.read(buffersize
);
1905 if not databytes
: break;
1906 datasize
= len(databytes
);
1907 fulldatasize
= datasize
+ fulldatasize
;
1910 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1911 downloaddiff
= fulldatasize
- prevdownsize
;
1912 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1913 prevdownsize
= fulldatasize
;
1914 strbuf
.write(databytes
);
1916 returnval_content
= strbuf
.read();
1917 if(httpheaderout
.get("Content-Encoding")=="gzip"):
1919 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
1922 if(httpheaderout
.get("Content-Encoding")=="deflate"):
1924 returnval_content
= zlib
.decompress(returnval_content
);
1927 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1929 returnval_content
= brotli
.decompress(returnval_content
);
1930 except brotli
.error
:
1932 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1933 geturls_text
.close();
1937 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
1938 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
1942 def download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1943 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1944 exec_time_start
= time
.time();
1945 myhash
= hashlib
.new("sha1");
1946 if(sys
.version
[0]=="2"):
1947 myhash
.update(httpurl
);
1948 myhash
.update(str(buffersize
));
1949 myhash
.update(str(exec_time_start
));
1950 if(sys
.version
[0]>="3"):
1951 myhash
.update(httpurl
.encode('utf-8'));
1952 myhash
.update(str(buffersize
).encode('utf-8'));
1953 myhash
.update(str(exec_time_start
).encode('utf-8'));
1954 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1956 sleep
= geturls_download_sleep
;
1959 pretmpfilename
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
1960 if(not pretmpfilename
):
1962 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1963 tmpfilename
= f
.name
;
1965 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1966 except AttributeError:
1968 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1973 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
1974 f
.write(pretmpfilename
['Content']);
1976 exec_time_end
= time
.time();
1977 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1978 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1982 def download_from_url_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1983 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
1987 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1988 global geturls_download_sleep
;
1990 sleep
= geturls_download_sleep
;
1993 if(not outfile
=="-"):
1994 outpath
= outpath
.rstrip(os
.path
.sep
);
1995 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1996 if(not os
.path
.exists(outpath
)):
1997 os
.makedirs(outpath
);
1998 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2000 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2002 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2003 if(not pretmpfilename
):
2005 tmpfilename
= pretmpfilename
['Filename'];
2006 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2008 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2009 exec_time_start
= time
.time();
2010 shutil
.move(tmpfilename
, filepath
);
2012 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2013 except AttributeError:
2015 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2020 exec_time_end
= time
.time();
2021 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2022 if(os
.path
.exists(tmpfilename
)):
2023 os
.remove(tmpfilename
);
2024 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2026 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2027 tmpfilename
= pretmpfilename
['Filename'];
2028 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2031 exec_time_start
= time
.time();
2032 with
open(tmpfilename
, 'rb') as ft
:
2035 databytes
= ft
.read(buffersize
[1]);
2036 if not databytes
: break;
2037 datasize
= len(databytes
);
2038 fulldatasize
= datasize
+ fulldatasize
;
2041 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2042 downloaddiff
= fulldatasize
- prevdownsize
;
2043 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2044 prevdownsize
= fulldatasize
;
2047 fdata
= f
.getvalue();
2050 os
.remove(tmpfilename
);
2051 exec_time_end
= time
.time();
2052 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2053 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2057 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2058 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2062 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2063 global geturls_download_sleep
, havebrotli
;
2065 sleep
= geturls_download_sleep
;
2068 urlparts
= urlparse
.urlparse(httpurl
);
2069 if(isinstance(httpheaders
, list)):
2070 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2071 httpheaders
= fix_header_names(httpheaders
);
2072 if(httpuseragent
is not None):
2073 if('User-Agent' in httpheaders
):
2074 httpheaders
['User-Agent'] = httpuseragent
;
2076 httpuseragent
.update({'User-Agent': httpuseragent
});
2077 if(httpreferer
is not None):
2078 if('Referer' in httpheaders
):
2079 httpheaders
['Referer'] = httpreferer
;
2081 httpuseragent
.update({'Referer': httpreferer
});
2082 if(urlparts
.username
is not None or urlparts
.password
is not None):
2083 if(sys
.version
[0]=="2"):
2084 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2085 if(sys
.version
[0]>="3"):
2086 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2087 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2089 if(postdata
is not None and not isinstance(postdata
, dict)):
2090 postdata
= urlencode(postdata
);
2092 if(httpmethod
=="GET"):
2093 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2094 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2095 elif(httpmethod
=="POST"):
2096 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2097 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2099 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2100 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
2101 except httpx
.ConnectTimeout
:
2102 log
.info("Error With URL "+httpurl
);
2104 except httpx
.ConnectError
:
2105 log
.info("Error With URL "+httpurl
);
2107 except socket
.timeout
:
2108 log
.info("Error With URL "+httpurl
);
2110 httpcodeout
= geturls_text
.status_code
;
2111 httpcodereason
= geturls_text
.reason_phrase
;
2112 httpversionout
= geturls_text
.http_version
;
2113 httpmethodout
= httpmethod
;
2114 httpurlout
= str(geturls_text
.url
);
2115 httpheaderout
= geturls_text
.headers
;
2116 httpheadersentout
= geturls_text
.request
.headers
;
2117 if(isinstance(httpheaderout
, list)):
2118 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2119 if(sys
.version
[0]=="2"):
2121 prehttpheaderout
= httpheaderout
;
2122 httpheaderkeys
= httpheaderout
.keys();
2123 imax
= len(httpheaderkeys
);
2127 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2129 except AttributeError:
2131 httpheaderout
= fix_header_names(httpheaderout
);
2132 if(isinstance(httpheadersentout
, list)):
2133 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2134 httpheadersentout
= fix_header_names(httpheadersentout
);
2135 log
.info("Downloading URL "+httpurl
);
2136 downloadsize
= httpheaderout
.get('Content-Length');
2137 if(downloadsize
is not None):
2138 downloadsize
= int(downloadsize
);
2139 if downloadsize
is None: downloadsize
= 0;
2142 log
.info("Downloading URL "+httpurl
);
2143 with
BytesIO() as strbuf
:
2145 databytes
= geturls_text
.read(buffersize
);
2146 if not databytes
: break;
2147 datasize
= len(databytes
);
2148 fulldatasize
= datasize
+ fulldatasize
;
2151 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2152 downloaddiff
= fulldatasize
- prevdownsize
;
2153 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2154 prevdownsize
= fulldatasize
;
2155 strbuf
.write(databytes
);
2157 returnval_content
= strbuf
.read();
2158 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2160 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2163 if(httpheaderout
.get("Content-Encoding")=="deflate"):
2165 returnval_content
= zlib
.decompress(returnval_content
);
2168 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2170 returnval_content
= brotli
.decompress(returnval_content
);
2171 except brotli
.error
:
2173 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
2174 geturls_text
.close();
2178 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2179 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2183 def download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2184 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
2185 exec_time_start
= time
.time();
2186 myhash
= hashlib
.new("sha1");
2187 if(sys
.version
[0]=="2"):
2188 myhash
.update(httpurl
);
2189 myhash
.update(str(buffersize
));
2190 myhash
.update(str(exec_time_start
));
2191 if(sys
.version
[0]>="3"):
2192 myhash
.update(httpurl
.encode('utf-8'));
2193 myhash
.update(str(buffersize
).encode('utf-8'));
2194 myhash
.update(str(exec_time_start
).encode('utf-8'));
2195 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2197 sleep
= geturls_download_sleep
;
2200 pretmpfilename
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2201 if(not pretmpfilename
):
2203 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2204 tmpfilename
= f
.name
;
2206 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2207 except AttributeError:
2209 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2214 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
2215 f
.write(pretmpfilename
['Content']);
2217 exec_time_end
= time
.time();
2218 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2219 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2223 def download_from_url_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2224 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2228 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2229 global geturls_download_sleep
;
2231 sleep
= geturls_download_sleep
;
2234 if(not outfile
=="-"):
2235 outpath
= outpath
.rstrip(os
.path
.sep
);
2236 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2237 if(not os
.path
.exists(outpath
)):
2238 os
.makedirs(outpath
);
2239 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2241 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2243 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2244 if(not pretmpfilename
):
2246 tmpfilename
= pretmpfilename
['Filename'];
2247 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2249 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2250 exec_time_start
= time
.time();
2251 shutil
.move(tmpfilename
, filepath
);
2253 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2254 except AttributeError:
2256 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2261 exec_time_end
= time
.time();
2262 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2263 if(os
.path
.exists(tmpfilename
)):
2264 os
.remove(tmpfilename
);
2265 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2267 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2268 tmpfilename
= pretmpfilename
['Filename'];
2269 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2272 exec_time_start
= time
.time();
2273 with
open(tmpfilename
, 'rb') as ft
:
2276 databytes
= ft
.read(buffersize
[1]);
2277 if not databytes
: break;
2278 datasize
= len(databytes
);
2279 fulldatasize
= datasize
+ fulldatasize
;
2282 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2283 downloaddiff
= fulldatasize
- prevdownsize
;
2284 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2285 prevdownsize
= fulldatasize
;
2288 fdata
= f
.getvalue();
2291 os
.remove(tmpfilename
);
2292 exec_time_end
= time
.time();
2293 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2294 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2298 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2299 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2303 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2304 global geturls_download_sleep
, havebrotli
;
2306 sleep
= geturls_download_sleep
;
2309 urlparts
= urlparse
.urlparse(httpurl
);
2310 if(isinstance(httpheaders
, list)):
2311 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2312 httpheaders
= fix_header_names(httpheaders
);
2313 if(httpuseragent
is not None):
2314 if('User-Agent' in httpheaders
):
2315 httpheaders
['User-Agent'] = httpuseragent
;
2317 httpuseragent
.update({'User-Agent': httpuseragent
});
2318 if(httpreferer
is not None):
2319 if('Referer' in httpheaders
):
2320 httpheaders
['Referer'] = httpreferer
;
2322 httpuseragent
.update({'Referer': httpreferer
});
2323 if(urlparts
.username
is not None or urlparts
.password
is not None):
2324 if(sys
.version
[0]=="2"):
2325 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2326 if(sys
.version
[0]>="3"):
2327 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2328 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2330 if(postdata
is not None and not isinstance(postdata
, dict)):
2331 postdata
= urlencode(postdata
);
2333 if(httpmethod
=="GET"):
2334 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2335 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2336 elif(httpmethod
=="POST"):
2337 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2338 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2340 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2341 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2342 except httpcore
.ConnectTimeout
:
2343 log
.info("Error With URL "+httpurl
);
2345 except httpcore
.ConnectError
:
2346 log
.info("Error With URL "+httpurl
);
2348 except socket
.timeout
:
2349 log
.info("Error With URL "+httpurl
);
2351 httpcodeout
= geturls_text
.status
;
2352 httpcodereason
= http_status_to_reason(geturls_text
.status
);
2353 httpversionout
= "1.1";
2354 httpmethodout
= httpmethod
;
2355 httpurlout
= str(httpurl
);
2356 httpheaderout
= geturls_text
.headers
;
2357 httpheadersentout
= httpheaders
;
2358 if(isinstance(httpheaderout
, list)):
2359 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2360 if(sys
.version
[0]=="2"):
2362 prehttpheaderout
= httpheaderout
;
2363 httpheaderkeys
= httpheaderout
.keys();
2364 imax
= len(httpheaderkeys
);
2368 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2370 except AttributeError:
2372 httpheaderout
= fix_header_names(httpheaderout
);
2373 if(isinstance(httpheadersentout
, list)):
2374 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2375 httpheadersentout
= fix_header_names(httpheadersentout
);
2376 log
.info("Downloading URL "+httpurl
);
2377 downloadsize
= httpheaderout
.get('Content-Length');
2378 if(downloadsize
is not None):
2379 downloadsize
= int(downloadsize
);
2380 if downloadsize
is None: downloadsize
= 0;
2383 log
.info("Downloading URL "+httpurl
);
2384 with
BytesIO() as strbuf
:
2386 databytes
= geturls_text
.read(buffersize
);
2387 if not databytes
: break;
2388 datasize
= len(databytes
);
2389 fulldatasize
= datasize
+ fulldatasize
;
2392 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2393 downloaddiff
= fulldatasize
- prevdownsize
;
2394 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2395 prevdownsize
= fulldatasize
;
2396 strbuf
.write(databytes
);
2398 returnval_content
= strbuf
.read();
2399 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2401 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2404 if(httpheaderout
.get("Content-Encoding")=="deflate"):
2406 returnval_content
= zlib
.decompress(returnval_content
);
2409 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2411 returnval_content
= brotli
.decompress(returnval_content
);
2412 except brotli
.error
:
2414 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
2415 geturls_text
.close();
2418 if(not havehttpcore
):
2419 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2420 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2424 def download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2425 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
2426 exec_time_start
= time
.time();
2427 myhash
= hashlib
.new("sha1");
2428 if(sys
.version
[0]=="2"):
2429 myhash
.update(httpurl
);
2430 myhash
.update(str(buffersize
));
2431 myhash
.update(str(exec_time_start
));
2432 if(sys
.version
[0]>="3"):
2433 myhash
.update(httpurl
.encode('utf-8'));
2434 myhash
.update(str(buffersize
).encode('utf-8'));
2435 myhash
.update(str(exec_time_start
).encode('utf-8'));
2436 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2438 sleep
= geturls_download_sleep
;
2441 pretmpfilename
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2442 if(not pretmpfilename
):
2444 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2445 tmpfilename
= f
.name
;
2447 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2448 except AttributeError:
2450 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2455 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
2456 f
.write(pretmpfilename
['Content']);
2458 exec_time_end
= time
.time();
2459 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2460 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2463 if(not havehttpcore
):
2464 def download_from_url_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2465 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2469 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2470 global geturls_download_sleep
;
2472 sleep
= geturls_download_sleep
;
2475 if(not outfile
=="-"):
2476 outpath
= outpath
.rstrip(os
.path
.sep
);
2477 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2478 if(not os
.path
.exists(outpath
)):
2479 os
.makedirs(outpath
);
2480 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2482 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2484 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2485 if(not pretmpfilename
):
2487 tmpfilename
= pretmpfilename
['Filename'];
2488 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2490 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2491 exec_time_start
= time
.time();
2492 shutil
.move(tmpfilename
, filepath
);
2494 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2495 except AttributeError:
2497 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2502 exec_time_end
= time
.time();
2503 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2504 if(os
.path
.exists(tmpfilename
)):
2505 os
.remove(tmpfilename
);
2506 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2508 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2509 tmpfilename
= pretmpfilename
['Filename'];
2510 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2513 exec_time_start
= time
.time();
2514 with
open(tmpfilename
, 'rb') as ft
:
2517 databytes
= ft
.read(buffersize
[1]);
2518 if not databytes
: break;
2519 datasize
= len(databytes
);
2520 fulldatasize
= datasize
+ fulldatasize
;
2523 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2524 downloaddiff
= fulldatasize
- prevdownsize
;
2525 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2526 prevdownsize
= fulldatasize
;
2529 fdata
= f
.getvalue();
2532 os
.remove(tmpfilename
);
2533 exec_time_end
= time
.time();
2534 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2535 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2538 if(not havehttpcore
):
2539 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2540 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2544 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2545 global geturls_download_sleep
, havebrotli
;
2547 sleep
= geturls_download_sleep
;
2550 urlparts
= urlparse
.urlparse(httpurl
);
2551 if(isinstance(httpheaders
, list)):
2552 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2553 httpheaders
= fix_header_names(httpheaders
);
2554 if(httpuseragent
is not None):
2555 if('User-Agent' in httpheaders
):
2556 httpheaders
['User-Agent'] = httpuseragent
;
2558 httpuseragent
.update({'User-Agent': httpuseragent
});
2559 if(httpreferer
is not None):
2560 if('Referer' in httpheaders
):
2561 httpheaders
['Referer'] = httpreferer
;
2563 httpuseragent
.update({'Referer': httpreferer
});
2564 if(urlparts
.username
is not None or urlparts
.password
is not None):
2565 if(sys
.version
[0]=="2"):
2566 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2567 if(sys
.version
[0]>="3"):
2568 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2569 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2571 if(postdata
is not None and not isinstance(postdata
, dict)):
2572 postdata
= urlencode(postdata
);
2574 if(httpmethod
=="GET"):
2575 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2576 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2577 elif(httpmethod
=="POST"):
2578 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2579 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2581 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2582 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2583 except httpcore
.ConnectTimeout
:
2584 log
.info("Error With URL "+httpurl
);
2586 except httpcore
.ConnectError
:
2587 log
.info("Error With URL "+httpurl
);
2589 except socket
.timeout
:
2590 log
.info("Error With URL "+httpurl
);
2592 httpcodeout
= geturls_text
.status
;
2593 httpcodereason
= geturls_text
.reason_phrase
;
2594 httpversionout
= "1.1";
2595 httpmethodout
= httpmethod
;
2596 httpurlout
= str(httpurl
);
2597 httpheaderout
= geturls_text
.headers
;
2598 httpheadersentout
= httpheaders
;
2599 if(isinstance(httpheaderout
, list)):
2600 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2601 if(sys
.version
[0]=="2"):
2603 prehttpheaderout
= httpheaderout
;
2604 httpheaderkeys
= httpheaderout
.keys();
2605 imax
= len(httpheaderkeys
);
2609 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2611 except AttributeError:
2613 httpheaderout
= fix_header_names(httpheaderout
);
2614 if(isinstance(httpheadersentout
, list)):
2615 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2616 httpheadersentout
= fix_header_names(httpheadersentout
);
2617 log
.info("Downloading URL "+httpurl
);
2618 downloadsize
= httpheaderout
.get('Content-Length');
2619 if(downloadsize
is not None):
2620 downloadsize
= int(downloadsize
);
2621 if downloadsize
is None: downloadsize
= 0;
2624 log
.info("Downloading URL "+httpurl
);
2625 with
BytesIO() as strbuf
:
2627 databytes
= geturls_text
.read(buffersize
);
2628 if not databytes
: break;
2629 datasize
= len(databytes
);
2630 fulldatasize
= datasize
+ fulldatasize
;
2633 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2634 downloaddiff
= fulldatasize
- prevdownsize
;
2635 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2636 prevdownsize
= fulldatasize
;
2637 strbuf
.write(databytes
);
2639 returnval_content
= strbuf
.read();
2640 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2642 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2645 if(httpheaderout
.get("Content-Encoding")=="deflate"):
2647 returnval_content
= zlib
.decompress(returnval_content
);
2650 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2652 returnval_content
= brotli
.decompress(returnval_content
);
2653 except brotli
.error
:
2655 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
2656 geturls_text
.close();
2659 if(not havehttpcore
):
2660 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2661 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2665 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2666 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
2667 exec_time_start
= time
.time();
2668 myhash
= hashlib
.new("sha1");
2669 if(sys
.version
[0]=="2"):
2670 myhash
.update(httpurl
);
2671 myhash
.update(str(buffersize
));
2672 myhash
.update(str(exec_time_start
));
2673 if(sys
.version
[0]>="3"):
2674 myhash
.update(httpurl
.encode('utf-8'));
2675 myhash
.update(str(buffersize
).encode('utf-8'));
2676 myhash
.update(str(exec_time_start
).encode('utf-8'));
2677 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2679 sleep
= geturls_download_sleep
;
2682 pretmpfilename
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2683 if(not pretmpfilename
):
2685 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2686 tmpfilename
= f
.name
;
2688 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2689 except AttributeError:
2691 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2696 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
2697 f
.write(pretmpfilename
['Content']);
2699 exec_time_end
= time
.time();
2700 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2701 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2704 if(not havehttpcore
):
2705 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2706 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2710 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2711 global geturls_download_sleep
;
2713 sleep
= geturls_download_sleep
;
2716 if(not outfile
=="-"):
2717 outpath
= outpath
.rstrip(os
.path
.sep
);
2718 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2719 if(not os
.path
.exists(outpath
)):
2720 os
.makedirs(outpath
);
2721 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2723 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2725 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2726 if(not pretmpfilename
):
2728 tmpfilename
= pretmpfilename
['Filename'];
2729 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2731 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2732 exec_time_start
= time
.time();
2733 shutil
.move(tmpfilename
, filepath
);
2735 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2736 except AttributeError:
2738 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2743 exec_time_end
= time
.time();
2744 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2745 if(os
.path
.exists(tmpfilename
)):
2746 os
.remove(tmpfilename
);
2747 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2749 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
2750 tmpfilename
= pretmpfilename
['Filename'];
2751 downloadsize
= int(os
.path
.getsize(tmpfilename
));
2754 exec_time_start
= time
.time();
2755 with
open(tmpfilename
, 'rb') as ft
:
2758 databytes
= ft
.read(buffersize
[1]);
2759 if not databytes
: break;
2760 datasize
= len(databytes
);
2761 fulldatasize
= datasize
+ fulldatasize
;
2764 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2765 downloaddiff
= fulldatasize
- prevdownsize
;
2766 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2767 prevdownsize
= fulldatasize
;
2770 fdata
= f
.getvalue();
2773 os
.remove(tmpfilename
);
2774 exec_time_end
= time
.time();
2775 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2776 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2780 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2781 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2785 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2786 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2789 if(not haveurllib3
):
2790 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2791 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2795 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2796 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2799 if(not haveurllib3
):
2800 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2801 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2805 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2806 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2809 if(not haveurllib3
):
2810 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2811 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
2815 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2816 global geturls_download_sleep
, havebrotli
;
2818 sleep
= geturls_download_sleep
;
2821 urlparts
= urlparse
.urlparse(httpurl
);
2822 if(isinstance(httpheaders
, list)):
2823 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2824 httpheaders
= fix_header_names(httpheaders
);
2825 if(httpuseragent
is not None):
2826 if('User-Agent' in httpheaders
):
2827 httpheaders
['User-Agent'] = httpuseragent
;
2829 httpuseragent
.update({'User-Agent': httpuseragent
});
2830 if(httpreferer
is not None):
2831 if('Referer' in httpheaders
):
2832 httpheaders
['Referer'] = httpreferer
;
2834 httpuseragent
.update({'Referer': httpreferer
});
2835 if(urlparts
.username
is not None or urlparts
.password
is not None):
2836 if(sys
.version
[0]=="2"):
2837 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
2838 if(sys
.version
[0]>="3"):
2839 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2840 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2842 timeout
= urllib3
.util
.Timeout(connect
=timeout
, read
=timeout
);
2843 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
, timeout
=timeout
);
2844 if(postdata
is not None and not isinstance(postdata
, dict)):
2845 postdata
= urlencode(postdata
);
2847 if(httpmethod
=="GET"):
2848 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
2849 elif(httpmethod
=="POST"):
2850 geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
2852 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
2853 except urllib3
.exceptions
.ConnectTimeoutError
:
2854 log
.info("Error With URL "+httpurl
);
2856 except urllib3
.exceptions
.ConnectError
:
2857 log
.info("Error With URL "+httpurl
);
2859 except urllib3
.exceptions
.MaxRetryError
:
2860 log
.info("Error With URL "+httpurl
);
2862 except socket
.timeout
:
2863 log
.info("Error With URL "+httpurl
);
2866 log
.info("Error With URL "+httpurl
);
2868 httpcodeout
= geturls_text
.status
;
2869 httpcodereason
= geturls_text
.reason
;
2870 if(geturls_text
.version
=="10"):
2871 httpversionout
= "1.0";
2873 httpversionout
= "1.1";
2874 httpmethodout
= httpmethod
;
2875 httpurlout
= geturls_text
.geturl();
2876 httpheaderout
= geturls_text
.info();
2877 httpheadersentout
= httpheaders
;
2878 if(isinstance(httpheaderout
, list)):
2879 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2880 if(sys
.version
[0]=="2"):
2882 prehttpheaderout
= httpheaderout
;
2883 httpheaderkeys
= httpheaderout
.keys();
2884 imax
= len(httpheaderkeys
);
2888 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2890 except AttributeError:
2892 httpheaderout
= fix_header_names(httpheaderout
);
2893 if(isinstance(httpheadersentout
, list)):
2894 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2895 httpheadersentout
= fix_header_names(httpheadersentout
);
2896 log
.info("Downloading URL "+httpurl
);
2897 downloadsize
= httpheaderout
.get('Content-Length');
2898 if(downloadsize
is not None):
2899 downloadsize
= int(downloadsize
);
2900 if downloadsize
is None: downloadsize
= 0;
2903 log
.info("Downloading URL "+httpurl
);
2904 with
BytesIO() as strbuf
:
2906 databytes
= geturls_text
.read(buffersize
);
2907 if not databytes
: break;
2908 datasize
= len(databytes
);
2909 fulldatasize
= datasize
+ fulldatasize
;
2912 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2913 downloaddiff
= fulldatasize
- prevdownsize
;
2914 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2915 prevdownsize
= fulldatasize
;
2916 strbuf
.write(databytes
);
2918 returnval_content
= strbuf
.read();
2919 if(httpheaderout
.get("Content-Encoding")=="gzip"):
2921 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
2924 if(httpheaderout
.get("Content-Encoding")=="deflate"):
2926 returnval_content
= zlib
.decompress(returnval_content
);
2929 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2931 returnval_content
= brotli
.decompress(returnval_content
);
2932 except brotli
.error
:
2934 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
2935 geturls_text
.close();
2938 if(not haveurllib3
):
2939 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
2940 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
2944 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2945 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
2946 exec_time_start
= time
.time();
2947 myhash
= hashlib
.new("sha1");
2948 if(sys
.version
[0]=="2"):
2949 myhash
.update(httpurl
);
2950 myhash
.update(str(buffersize
));
2951 myhash
.update(str(exec_time_start
));
2952 if(sys
.version
[0]>="3"):
2953 myhash
.update(httpurl
.encode('utf-8'));
2954 myhash
.update(str(buffersize
).encode('utf-8'));
2955 myhash
.update(str(exec_time_start
).encode('utf-8'));
2956 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2958 sleep
= geturls_download_sleep
;
2961 pretmpfilename
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
2962 if(not pretmpfilename
):
2964 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2965 tmpfilename
= f
.name
;
2967 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2968 except AttributeError:
2970 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2975 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
2976 f
.write(pretmpfilename
['Content']);
2978 exec_time_end
= time
.time();
2979 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2980 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2983 if(not haveurllib3
):
2984 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
2985 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
2989 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2990 global geturls_download_sleep
;
2992 sleep
= geturls_download_sleep
;
2995 if(not outfile
=="-"):
2996 outpath
= outpath
.rstrip(os
.path
.sep
);
2997 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2998 if(not os
.path
.exists(outpath
)):
2999 os
.makedirs(outpath
);
3000 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3002 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3004 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3005 if(not pretmpfilename
):
3007 tmpfilename
= pretmpfilename
['Filename'];
3008 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3010 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3011 exec_time_start
= time
.time();
3012 shutil
.move(tmpfilename
, filepath
);
3014 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3015 except AttributeError:
3017 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3022 exec_time_end
= time
.time();
3023 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3024 if(os
.path
.exists(tmpfilename
)):
3025 os
.remove(tmpfilename
);
3026 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3028 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3029 tmpfilename
= pretmpfilename
['Filename'];
3030 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3033 exec_time_start
= time
.time();
3034 with
open(tmpfilename
, 'rb') as ft
:
3037 databytes
= ft
.read(buffersize
[1]);
3038 if not databytes
: break;
3039 datasize
= len(databytes
);
3040 fulldatasize
= datasize
+ fulldatasize
;
3043 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3044 downloaddiff
= fulldatasize
- prevdownsize
;
3045 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3046 prevdownsize
= fulldatasize
;
3049 fdata
= f
.getvalue();
3052 os
.remove(tmpfilename
);
3053 exec_time_end
= time
.time();
3054 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3055 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3058 if(not haveurllib3
):
3059 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3060 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3064 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3065 global geturls_download_sleep
, havebrotli
;
3067 sleep
= geturls_download_sleep
;
3070 urlparts
= urlparse
.urlparse(httpurl
);
3071 if(isinstance(httpheaders
, list)):
3072 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3073 httpheaders
= fix_header_names(httpheaders
);
3074 if(httpuseragent
is not None):
3075 if('User-Agent' in httpheaders
):
3076 httpheaders
['User-Agent'] = httpuseragent
;
3078 httpuseragent
.update({'User-Agent': httpuseragent
});
3079 if(httpreferer
is not None):
3080 if('Referer' in httpheaders
):
3081 httpheaders
['Referer'] = httpreferer
;
3083 httpuseragent
.update({'Referer': httpreferer
});
3084 if(urlparts
.username
is not None or urlparts
.password
is not None):
3085 if(sys
.version
[0]=="2"):
3086 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3087 if(sys
.version
[0]>="3"):
3088 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3089 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3090 geturls_opener
= mechanize
.Browser();
3091 if(isinstance(httpheaders
, dict)):
3092 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
3094 geturls_opener
.addheaders
= httpheaders
;
3095 geturls_opener
.set_cookiejar(httpcookie
);
3096 geturls_opener
.set_handle_robots(False);
3097 if(postdata
is not None and not isinstance(postdata
, dict)):
3098 postdata
= urlencode(postdata
);
3100 if(httpmethod
=="GET"):
3101 geturls_text
= geturls_opener
.open(httpurl
);
3102 elif(httpmethod
=="POST"):
3103 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
3105 geturls_text
= geturls_opener
.open(httpurl
);
3106 except mechanize
.HTTPError
as geturls_text_error
:
3107 geturls_text
= geturls_text_error
;
3108 log
.info("Error With URL "+httpurl
);
3110 log
.info("Error With URL "+httpurl
);
3112 except socket
.timeout
:
3113 log
.info("Error With URL "+httpurl
);
3115 httpcodeout
= geturls_text
.code
;
3116 httpcodereason
= geturls_text
.msg
;
3117 httpversionout
= "1.1";
3118 httpmethodout
= httpmethod
;
3119 httpurlout
= geturls_text
.geturl();
3120 httpheaderout
= geturls_text
.info();
3121 reqhead
= geturls_opener
.request
;
3122 httpheadersentout
= reqhead
.header_items();
3123 if(isinstance(httpheaderout
, list)):
3124 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3125 if(sys
.version
[0]=="2"):
3127 prehttpheaderout
= httpheaderout
;
3128 httpheaderkeys
= httpheaderout
.keys();
3129 imax
= len(httpheaderkeys
);
3133 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3135 except AttributeError:
3137 httpheaderout
= fix_header_names(httpheaderout
);
3138 if(isinstance(httpheadersentout
, list)):
3139 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3140 httpheadersentout
= fix_header_names(httpheadersentout
);
3141 log
.info("Downloading URL "+httpurl
);
3142 downloadsize
= httpheaderout
.get('Content-Length');
3143 if(downloadsize
is not None):
3144 downloadsize
= int(downloadsize
);
3145 if downloadsize
is None: downloadsize
= 0;
3148 log
.info("Downloading URL "+httpurl
);
3149 with
BytesIO() as strbuf
:
3151 databytes
= geturls_text
.read(buffersize
);
3152 if not databytes
: break;
3153 datasize
= len(databytes
);
3154 fulldatasize
= datasize
+ fulldatasize
;
3157 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3158 downloaddiff
= fulldatasize
- prevdownsize
;
3159 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3160 prevdownsize
= fulldatasize
;
3161 strbuf
.write(databytes
);
3163 returnval_content
= strbuf
.read();
3164 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3166 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3169 if(httpheaderout
.get("Content-Encoding")=="deflate"):
3171 returnval_content
= zlib
.decompress(returnval_content
);
3174 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3176 returnval_content
= brotli
.decompress(returnval_content
);
3177 except brotli
.error
:
3179 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
3180 geturls_text
.close();
3183 if(not havemechanize
):
3184 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3185 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3189 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3190 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
3191 exec_time_start
= time
.time();
3192 myhash
= hashlib
.new("sha1");
3193 if(sys
.version
[0]=="2"):
3194 myhash
.update(httpurl
);
3195 myhash
.update(str(buffersize
));
3196 myhash
.update(str(exec_time_start
));
3197 if(sys
.version
[0]>="3"):
3198 myhash
.update(httpurl
.encode('utf-8'));
3199 myhash
.update(str(buffersize
).encode('utf-8'));
3200 myhash
.update(str(exec_time_start
).encode('utf-8'));
3201 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3203 sleep
= geturls_download_sleep
;
3206 pretmpfilename
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3207 if(not pretmpfilename
):
3209 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3210 tmpfilename
= f
.name
;
3212 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3213 except AttributeError:
3215 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3220 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
3221 f
.write(pretmpfilename
['Content']);
3223 exec_time_end
= time
.time();
3224 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3225 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3228 if(not havemechanize
):
3229 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3230 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3234 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3235 global geturls_download_sleep
;
3237 sleep
= geturls_download_sleep
;
3240 if(not outfile
=="-"):
3241 outpath
= outpath
.rstrip(os
.path
.sep
);
3242 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3243 if(not os
.path
.exists(outpath
)):
3244 os
.makedirs(outpath
);
3245 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3247 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3249 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3250 if(not pretmpfilename
):
3252 tmpfilename
= pretmpfilename
['Filename'];
3253 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3255 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3256 exec_time_start
= time
.time();
3257 shutil
.move(tmpfilename
, filepath
);
3259 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3260 except AttributeError:
3262 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3267 exec_time_end
= time
.time();
3268 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3269 if(os
.path
.exists(tmpfilename
)):
3270 os
.remove(tmpfilename
);
3271 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3273 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3274 tmpfilename
= pretmpfilename
['Filename'];
3275 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3278 exec_time_start
= time
.time();
3279 with
open(tmpfilename
, 'rb') as ft
:
3282 databytes
= ft
.read(buffersize
[1]);
3283 if not databytes
: break;
3284 datasize
= len(databytes
);
3285 fulldatasize
= datasize
+ fulldatasize
;
3288 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3289 downloaddiff
= fulldatasize
- prevdownsize
;
3290 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3291 prevdownsize
= fulldatasize
;
3294 fdata
= f
.getvalue();
3297 os
.remove(tmpfilename
);
3298 exec_time_end
= time
.time();
3299 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3300 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3303 if(not havemechanize
):
3304 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3305 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3309 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3310 global geturls_download_sleep
, havebrotli
;
3312 sleep
= geturls_download_sleep
;
3315 urlparts
= urlparse
.urlparse(httpurl
);
3316 if(isinstance(httpheaders
, list)):
3317 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3318 httpheaders
= fix_header_names(httpheaders
);
3319 if(httpuseragent
is not None):
3320 if('User-Agent' in httpheaders
):
3321 httpheaders
['User-Agent'] = httpuseragent
;
3323 httpuseragent
.update({'User-Agent': httpuseragent
});
3324 if(httpreferer
is not None):
3325 if('Referer' in httpheaders
):
3326 httpheaders
['Referer'] = httpreferer
;
3328 httpuseragent
.update({'Referer': httpreferer
});
3329 if(urlparts
.username
is not None or urlparts
.password
is not None):
3330 if(sys
.version
[0]=="2"):
3331 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3332 if(sys
.version
[0]>="3"):
3333 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3334 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3335 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
3336 if(isinstance(httpheaders
, dict)):
3337 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
3338 geturls_opener
.addheaders
= httpheaders
;
3340 if(postdata
is not None and not isinstance(postdata
, dict)):
3341 postdata
= urlencode(postdata
);
3342 retrieved_body
= BytesIO();
3343 retrieved_headers
= BytesIO();
3345 if(httpmethod
=="GET"):
3346 geturls_text
= pycurl
.Curl();
3347 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3348 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3349 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3350 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3351 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3352 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3353 geturls_text
.perform();
3354 elif(httpmethod
=="POST"):
3355 geturls_text
= pycurl
.Curl();
3356 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3357 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3358 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3359 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3360 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3361 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3362 geturls_text
.setopt(geturls_text
.POST
, True);
3363 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
3364 geturls_text
.perform();
3366 geturls_text
= pycurl
.Curl();
3367 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3368 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3369 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3370 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3371 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3372 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3373 geturls_text
.perform();
3374 retrieved_headers
.seek(0);
3375 if(sys
.version
[0]=="2"):
3376 pycurlhead
= retrieved_headers
.read();
3377 if(sys
.version
[0]>="3"):
3378 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
3379 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+) ([A-Za-z\s]+)$', pycurlhead
.splitlines()[0])[0];
3380 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
3381 retrieved_body
.seek(0);
3382 except socket
.timeout
:
3383 log
.info("Error With URL "+httpurl
);
3385 except socket
.gaierror
:
3386 log
.info("Error With URL "+httpurl
);
3389 log
.info("Error With URL "+httpurl
);
3391 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
3392 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
3393 httpversionout
= pyhttpverinfo
[0];
3394 httpmethodout
= httpmethod
;
3395 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
3396 httpheaderout
= pycurlheadersout
;
3397 httpheadersentout
= httpheaders
;
3398 if(isinstance(httpheaderout
, list)):
3399 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
3400 if(sys
.version
[0]=="2"):
3402 prehttpheaderout
= httpheaderout
;
3403 httpheaderkeys
= httpheaderout
.keys();
3404 imax
= len(httpheaderkeys
);
3408 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3410 except AttributeError:
3412 httpheaderout
= fix_header_names(httpheaderout
);
3413 if(isinstance(httpheadersentout
, list)):
3414 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
3415 httpheadersentout
= fix_header_names(httpheadersentout
);
3416 log
.info("Downloading URL "+httpurl
);
3417 downloadsize
= httpheaderout
.get('Content-Length');
3418 if(downloadsize
is not None):
3419 downloadsize
= int(downloadsize
);
3420 if downloadsize
is None: downloadsize
= 0;
3423 log
.info("Downloading URL "+httpurl
);
3424 with
BytesIO() as strbuf
:
3426 databytes
= retrieved_body
.read(buffersize
);
3427 if not databytes
: break;
3428 datasize
= len(databytes
);
3429 fulldatasize
= datasize
+ fulldatasize
;
3432 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3433 downloaddiff
= fulldatasize
- prevdownsize
;
3434 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3435 prevdownsize
= fulldatasize
;
3436 strbuf
.write(databytes
);
3438 returnval_content
= strbuf
.read();
3439 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3441 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3444 if(httpheaderout
.get("Content-Encoding")=="deflate"):
3446 returnval_content
= zlib
.decompress(returnval_content
);
3449 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3451 returnval_content
= brotli
.decompress(returnval_content
);
3452 except brotli
.error
:
3454 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
3455 geturls_text
.close();
3459 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3460 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3464 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3465 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
3466 exec_time_start
= time
.time();
3467 myhash
= hashlib
.new("sha1");
3468 if(sys
.version
[0]=="2"):
3469 myhash
.update(httpurl
);
3470 myhash
.update(str(buffersize
));
3471 myhash
.update(str(exec_time_start
));
3472 if(sys
.version
[0]>="3"):
3473 myhash
.update(httpurl
.encode('utf-8'));
3474 myhash
.update(str(buffersize
).encode('utf-8'));
3475 myhash
.update(str(exec_time_start
).encode('utf-8'));
3476 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3478 sleep
= geturls_download_sleep
;
3481 pretmpfilename
= download_from_url_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3482 if(not pretmpfilename
):
3484 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3485 tmpfilename
= f
.name
;
3487 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3488 except AttributeError:
3490 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3495 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
3496 f
.write(pretmpfilename
['Content']);
3498 exec_time_end
= time
.time();
3499 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3500 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3504 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3505 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3509 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3510 global geturls_download_sleep
;
3512 sleep
= geturls_download_sleep
;
3515 if(not outfile
=="-"):
3516 outpath
= outpath
.rstrip(os
.path
.sep
);
3517 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3518 if(not os
.path
.exists(outpath
)):
3519 os
.makedirs(outpath
);
3520 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3522 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3524 pretmpfilename
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3525 if(not pretmpfilename
):
3527 tmpfilename
= pretmpfilename
['Filename'];
3528 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3530 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3531 exec_time_start
= time
.time();
3532 shutil
.move(tmpfilename
, filepath
);
3534 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3535 except AttributeError:
3537 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3542 exec_time_end
= time
.time();
3543 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3544 if(os
.path
.exists(tmpfilename
)):
3545 os
.remove(tmpfilename
);
3546 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3548 pretmpfilename
= download_from_url_file_with_pycurl(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3549 tmpfilename
= pretmpfilename
['Filename'];
3550 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3553 exec_time_start
= time
.time();
3554 with
open(tmpfilename
, 'rb') as ft
:
3557 databytes
= ft
.read(buffersize
[1]);
3558 if not databytes
: break;
3559 datasize
= len(databytes
);
3560 fulldatasize
= datasize
+ fulldatasize
;
3563 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3564 downloaddiff
= fulldatasize
- prevdownsize
;
3565 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3566 prevdownsize
= fulldatasize
;
3569 fdata
= f
.getvalue();
3572 os
.remove(tmpfilename
);
3573 exec_time_end
= time
.time();
3574 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3575 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3579 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3580 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3584 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3585 global geturls_download_sleep
, havebrotli
;
3587 sleep
= geturls_download_sleep
;
3590 urlparts
= urlparse
.urlparse(httpurl
);
3591 if(isinstance(httpheaders
, list)):
3592 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3593 httpheaders
= fix_header_names(httpheaders
);
3594 if(httpuseragent
is not None):
3595 if('User-Agent' in httpheaders
):
3596 httpheaders
['User-Agent'] = httpuseragent
;
3598 httpuseragent
.update({'User-Agent': httpuseragent
});
3599 if(httpreferer
is not None):
3600 if('Referer' in httpheaders
):
3601 httpheaders
['Referer'] = httpreferer
;
3603 httpuseragent
.update({'Referer': httpreferer
});
3604 if(urlparts
.username
is not None or urlparts
.password
is not None):
3605 if(sys
.version
[0]=="2"):
3606 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3607 if(sys
.version
[0]>="3"):
3608 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3609 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3610 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
3611 if(isinstance(httpheaders
, dict)):
3612 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
3613 geturls_opener
.addheaders
= httpheaders
;
3615 if(postdata
is not None and not isinstance(postdata
, dict)):
3616 postdata
= urlencode(postdata
);
3617 retrieved_body
= BytesIO();
3618 retrieved_headers
= BytesIO();
3620 if(httpmethod
=="GET"):
3621 geturls_text
= pycurl
.Curl();
3622 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3623 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
3624 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3625 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3626 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3627 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3628 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3629 geturls_text
.perform();
3630 elif(httpmethod
=="POST"):
3631 geturls_text
= pycurl
.Curl();
3632 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3633 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
3634 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3635 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3636 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3637 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3638 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3639 geturls_text
.setopt(geturls_text
.POST
, True);
3640 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
3641 geturls_text
.perform();
3643 geturls_text
= pycurl
.Curl();
3644 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3645 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_2_0
);
3646 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3647 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3648 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3649 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3650 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3651 geturls_text
.perform();
3652 retrieved_headers
.seek(0);
3653 if(sys
.version
[0]=="2"):
3654 pycurlhead
= retrieved_headers
.read();
3655 if(sys
.version
[0]>="3"):
3656 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
3657 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+) ([A-Za-z\s]+)$', pycurlhead
.splitlines()[0])[0];
3658 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
3659 retrieved_body
.seek(0);
3660 except socket
.timeout
:
3661 log
.info("Error With URL "+httpurl
);
3663 except socket
.gaierror
:
3664 log
.info("Error With URL "+httpurl
);
3667 log
.info("Error With URL "+httpurl
);
3669 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
3670 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
3671 httpversionout
= pyhttpverinfo
[0];
3672 httpmethodout
= httpmethod
;
3673 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
3674 httpheaderout
= pycurlheadersout
;
3675 httpheadersentout
= httpheaders
;
3676 if(isinstance(httpheaderout
, list)):
3677 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
3678 if(sys
.version
[0]=="2"):
3680 prehttpheaderout
= httpheaderout
;
3681 httpheaderkeys
= httpheaderout
.keys();
3682 imax
= len(httpheaderkeys
);
3686 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3688 except AttributeError:
3690 httpheaderout
= fix_header_names(httpheaderout
);
3691 if(isinstance(httpheadersentout
, list)):
3692 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
3693 httpheadersentout
= fix_header_names(httpheadersentout
);
3694 log
.info("Downloading URL "+httpurl
);
3695 downloadsize
= httpheaderout
.get('Content-Length');
3696 if(downloadsize
is not None):
3697 downloadsize
= int(downloadsize
);
3698 if downloadsize
is None: downloadsize
= 0;
3701 log
.info("Downloading URL "+httpurl
);
3702 with
BytesIO() as strbuf
:
3704 databytes
= retrieved_body
.read(buffersize
);
3705 if not databytes
: break;
3706 datasize
= len(databytes
);
3707 fulldatasize
= datasize
+ fulldatasize
;
3710 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3711 downloaddiff
= fulldatasize
- prevdownsize
;
3712 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3713 prevdownsize
= fulldatasize
;
3714 strbuf
.write(databytes
);
3716 returnval_content
= strbuf
.read();
3717 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3719 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
3722 if(httpheaderout
.get("Content-Encoding")=="deflate"):
3724 returnval_content
= zlib
.decompress(returnval_content
);
3727 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3729 returnval_content
= brotli
.decompress(returnval_content
);
3730 except brotli
.error
:
3732 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
3733 geturls_text
.close();
3737 def download_from_url_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3738 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
3742 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3743 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
3744 exec_time_start
= time
.time();
3745 myhash
= hashlib
.new("sha1");
3746 if(sys
.version
[0]=="2"):
3747 myhash
.update(httpurl
);
3748 myhash
.update(str(buffersize
));
3749 myhash
.update(str(exec_time_start
));
3750 if(sys
.version
[0]>="3"):
3751 myhash
.update(httpurl
.encode('utf-8'));
3752 myhash
.update(str(buffersize
).encode('utf-8'));
3753 myhash
.update(str(exec_time_start
).encode('utf-8'));
3754 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3756 sleep
= geturls_download_sleep
;
3759 pretmpfilename
= download_from_url_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
3760 if(not pretmpfilename
):
3762 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3763 tmpfilename
= f
.name
;
3765 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3766 except AttributeError:
3768 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3773 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
3774 f
.write(pretmpfilename
['Content']);
3776 exec_time_end
= time
.time();
3777 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3778 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3782 def download_from_url_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
3783 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
3787 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3788 global geturls_download_sleep
;
3790 sleep
= geturls_download_sleep
;
3793 if(not outfile
=="-"):
3794 outpath
= outpath
.rstrip(os
.path
.sep
);
3795 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3796 if(not os
.path
.exists(outpath
)):
3797 os
.makedirs(outpath
);
3798 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3800 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3802 pretmpfilename
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3803 if(not pretmpfilename
):
3805 tmpfilename
= pretmpfilename
['Filename'];
3806 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3808 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3809 exec_time_start
= time
.time();
3810 shutil
.move(tmpfilename
, filepath
);
3812 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3813 except AttributeError:
3815 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3820 exec_time_end
= time
.time();
3821 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3822 if(os
.path
.exists(tmpfilename
)):
3823 os
.remove(tmpfilename
);
3824 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3826 pretmpfilename
= download_from_url_file_with_pycurl2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
3827 tmpfilename
= pretmpfilename
['Filename'];
3828 downloadsize
= int(os
.path
.getsize(tmpfilename
));
3831 exec_time_start
= time
.time();
3832 with
open(tmpfilename
, 'rb') as ft
:
3835 databytes
= ft
.read(buffersize
[1]);
3836 if not databytes
: break;
3837 datasize
= len(databytes
);
3838 fulldatasize
= datasize
+ fulldatasize
;
3841 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3842 downloaddiff
= fulldatasize
- prevdownsize
;
3843 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3844 prevdownsize
= fulldatasize
;
3847 fdata
= f
.getvalue();
3850 os
.remove(tmpfilename
);
3851 exec_time_end
= time
.time();
3852 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3853 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3857 def download_from_url_to_file_with_pycurl2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
3858 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
3862 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
3863 global geturls_download_sleep
, havebrotli
;
3865 sleep
= geturls_download_sleep
;
3868 urlparts
= urlparse
.urlparse(httpurl
);
3869 if(isinstance(httpheaders
, list)):
3870 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3871 httpheaders
= fix_header_names(httpheaders
);
3872 if(httpuseragent
is not None):
3873 if('User-Agent' in httpheaders
):
3874 httpheaders
['User-Agent'] = httpuseragent
;
3876 httpuseragent
.update({'User-Agent': httpuseragent
});
3877 if(httpreferer
is not None):
3878 if('Referer' in httpheaders
):
3879 httpheaders
['Referer'] = httpreferer
;
3881 httpuseragent
.update({'Referer': httpreferer
});
3882 if(urlparts
.username
is not None or urlparts
.password
is not None):
3883 if(sys
.version
[0]=="2"):
3884 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
3885 if(sys
.version
[0]>="3"):
3886 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3887 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3888 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
3889 if(isinstance(httpheaders
, dict)):
3890 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
3891 geturls_opener
.addheaders
= httpheaders
;
3893 if(postdata
is not None and not isinstance(postdata
, dict)):
3894 postdata
= urlencode(postdata
);
3895 retrieved_body
= BytesIO();
3896 retrieved_headers
= BytesIO();
3898 if(httpmethod
=="GET"):
3899 geturls_text
= pycurl
.Curl();
3900 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3901 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
3902 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3903 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3904 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3905 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3906 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3907 geturls_text
.perform();
3908 elif(httpmethod
=="POST"):
3909 geturls_text
= pycurl
.Curl();
3910 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3911 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
3912 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3913 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3914 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3915 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3916 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3917 geturls_text
.setopt(geturls_text
.POST
, True);
3918 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
3919 geturls_text
.perform();
3921 geturls_text
= pycurl
.Curl();
3922 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
3923 geturls_text
.setopt(geturls_text
.HTTP_VERSION
, geturls_text
.CURL_HTTP_VERSION_3_0
);
3924 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
3925 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
3926 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
3927 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
3928 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
3929 geturls_text
.perform();
3930 retrieved_headers
.seek(0);
3931 if(sys
.version
[0]=="2"):
3932 pycurlhead
= retrieved_headers
.read();
3933 if(sys
.version
[0]>="3"):
3934 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
3935 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+) ([A-Za-z\s]+)$', pycurlhead
.splitlines()[0])[0];
3936 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
3937 retrieved_body
.seek(0);
3938 except socket
.timeout
:
3939 log
.info("Error With URL "+httpurl
);
3941 except socket
.gaierror
:
3942 log
.info("Error With URL "+httpurl
);
3945 log
.info("Error With URL "+httpurl
);
3947 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
3948 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
3949 httpversionout
= pyhttpverinfo
[0];
3950 httpmethodout
= httpmethod
;
3951 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
3952 httpheaderout
= pycurlheadersout
;
3953 httpheadersentout
= httpheaders
;
3954 if(isinstance(httpheaderout
, list)):
3955 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
3956 if(sys
.version
[0]=="2"):
3958 prehttpheaderout
= httpheaderout
;
3959 httpheaderkeys
= httpheaderout
.keys();
3960 imax
= len(httpheaderkeys
);
3964 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3966 except AttributeError:
3968 httpheaderout
= fix_header_names(httpheaderout
);
3969 if(isinstance(httpheadersentout
, list)):
3970 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
3971 httpheadersentout
= fix_header_names(httpheadersentout
);
3972 log
.info("Downloading URL "+httpurl
);
3973 downloadsize
= httpheaderout
.get('Content-Length');
3974 if(downloadsize
is not None):
3975 downloadsize
= int(downloadsize
);
3976 if downloadsize
is None: downloadsize
= 0;
3979 log
.info("Downloading URL "+httpurl
);
3980 with
BytesIO() as strbuf
:
3982 databytes
= retrieved_body
.read(buffersize
);
3983 if not databytes
: break;
3984 datasize
= len(databytes
);
3985 fulldatasize
= datasize
+ fulldatasize
;
3988 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3989 downloaddiff
= fulldatasize
- prevdownsize
;
3990 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3991 prevdownsize
= fulldatasize
;
3992 strbuf
.write(databytes
);
3994 returnval_content
= strbuf
.read();
3995 if(httpheaderout
.get("Content-Encoding")=="gzip"):
3997 returnval_content
= zlib
.decompress(returnval_content
, 16+zlib
.MAX_WBITS
);
4000 if(httpheaderout
.get("Content-Encoding")=="deflate"):
4002 returnval_content
= zlib
.decompress(returnval_content
);
4005 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4007 returnval_content
= brotli
.decompress(returnval_content
);
4008 except brotli
.error
:
4010 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
4011 geturls_text
.close();
4015 def download_from_url_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4016 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
)
4020 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4021 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
4022 exec_time_start
= time
.time();
4023 myhash
= hashlib
.new("sha1");
4024 if(sys
.version
[0]=="2"):
4025 myhash
.update(httpurl
);
4026 myhash
.update(str(buffersize
));
4027 myhash
.update(str(exec_time_start
));
4028 if(sys
.version
[0]>="3"):
4029 myhash
.update(httpurl
.encode('utf-8'));
4030 myhash
.update(str(buffersize
).encode('utf-8'));
4031 myhash
.update(str(exec_time_start
).encode('utf-8'));
4032 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4034 sleep
= geturls_download_sleep
;
4037 pretmpfilename
= download_from_url_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4038 if(not pretmpfilename
):
4040 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4041 tmpfilename
= f
.name
;
4043 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4044 except AttributeError:
4046 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4051 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
4052 f
.write(pretmpfilename
['Content']);
4054 exec_time_end
= time
.time();
4055 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4056 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4060 def download_from_url_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4061 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
, sleep
, timeout
)
4065 def download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4066 global geturls_download_sleep
;
4068 sleep
= geturls_download_sleep
;
4071 if(not outfile
=="-"):
4072 outpath
= outpath
.rstrip(os
.path
.sep
);
4073 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4074 if(not os
.path
.exists(outpath
)):
4075 os
.makedirs(outpath
);
4076 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4078 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4080 pretmpfilename
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4081 if(not pretmpfilename
):
4083 tmpfilename
= pretmpfilename
['Filename'];
4084 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4086 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4087 exec_time_start
= time
.time();
4088 shutil
.move(tmpfilename
, filepath
);
4090 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4091 except AttributeError:
4093 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4098 exec_time_end
= time
.time();
4099 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4100 if(os
.path
.exists(tmpfilename
)):
4101 os
.remove(tmpfilename
);
4102 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4104 pretmpfilename
= download_from_url_file_with_pycurl3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4105 tmpfilename
= pretmpfilename
['Filename'];
4106 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4109 exec_time_start
= time
.time();
4110 with
open(tmpfilename
, 'rb') as ft
:
4113 databytes
= ft
.read(buffersize
[1]);
4114 if not databytes
: break;
4115 datasize
= len(databytes
);
4116 fulldatasize
= datasize
+ fulldatasize
;
4119 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4120 downloaddiff
= fulldatasize
- prevdownsize
;
4121 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4122 prevdownsize
= fulldatasize
;
4125 fdata
= f
.getvalue();
4128 os
.remove(tmpfilename
);
4129 exec_time_end
= time
.time();
4130 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4131 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4135 def download_from_url_to_file_with_pycurl3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4136 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
, timeout
)
4139 def download_file_from_ftp_file(url
):
4140 urlparts
= urlparse
.urlparse(url
);
4141 file_name
= os
.path
.basename(urlparts
.path
);
4142 file_dir
= os
.path
.dirname(urlparts
.path
);
4143 if(urlparts
.username
is not None):
4144 ftp_username
= urlparts
.username
;
4146 ftp_username
= "anonymous";
4147 if(urlparts
.password
is not None):
4148 ftp_password
= urlparts
.password
;
4149 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4150 ftp_password
= "anonymous";
4153 if(urlparts
.scheme
=="ftp"):
4155 elif(urlparts
.scheme
=="ftps"):
4159 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4161 ftp_port
= urlparts
.port
;
4162 if(urlparts
.port
is None):
4165 ftp
.connect(urlparts
.hostname
, ftp_port
);
4166 except socket
.gaierror
:
4167 log
.info("Error With URL "+httpurl
);
4169 except socket
.timeout
:
4170 log
.info("Error With URL "+httpurl
);
4172 ftp
.login(urlparts
.username
, urlparts
.password
);
4173 if(urlparts
.scheme
=="ftps"):
4175 ftpfile
= BytesIO();
4176 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
4177 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
4182 def download_file_from_ftp_string(url
):
4183 ftpfile
= download_file_from_ftp_file(url
);
4184 return ftpfile
.read();
4186 def download_from_url_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4187 global geturls_download_sleep
, havebrotli
;
4189 sleep
= geturls_download_sleep
;
4192 urlparts
= urlparse
.urlparse(httpurl
);
4193 if(isinstance(httpheaders
, list)):
4194 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4195 httpheaders
= fix_header_names(httpheaders
);
4196 if(httpuseragent
is not None):
4197 if('User-Agent' in httpheaders
):
4198 httpheaders
['User-Agent'] = httpuseragent
;
4200 httpuseragent
.update({'User-Agent': httpuseragent
});
4201 if(httpreferer
is not None):
4202 if('Referer' in httpheaders
):
4203 httpheaders
['Referer'] = httpreferer
;
4205 httpuseragent
.update({'Referer': httpreferer
});
4206 if(isinstance(httpheaders
, dict)):
4207 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4209 geturls_text
= download_file_from_ftp_file(httpurl
);
4210 if(not geturls_text
):
4212 downloadsize
= None;
4213 if(downloadsize
is not None):
4214 downloadsize
= int(downloadsize
);
4215 if downloadsize
is None: downloadsize
= 0;
4218 log
.info("Downloading URL "+httpurl
);
4219 with
BytesIO() as strbuf
:
4221 databytes
= geturls_text
.read(buffersize
);
4222 if not databytes
: break;
4223 datasize
= len(databytes
);
4224 fulldatasize
= datasize
+ fulldatasize
;
4227 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4228 downloaddiff
= fulldatasize
- prevdownsize
;
4229 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4230 prevdownsize
= fulldatasize
;
4231 strbuf
.write(databytes
);
4233 returnval_content
= strbuf
.read();
4234 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4235 geturls_text
.close();
4238 def download_from_url_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4239 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
4240 exec_time_start
= time
.time();
4241 myhash
= hashlib
.new("sha1");
4242 if(sys
.version
[0]=="2"):
4243 myhash
.update(httpurl
);
4244 myhash
.update(str(buffersize
));
4245 myhash
.update(str(exec_time_start
));
4246 if(sys
.version
[0]>="3"):
4247 myhash
.update(httpurl
.encode('utf-8'));
4248 myhash
.update(str(buffersize
).encode('utf-8'));
4249 myhash
.update(str(exec_time_start
).encode('utf-8'));
4250 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4252 sleep
= geturls_download_sleep
;
4255 pretmpfilename
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4256 if(not pretmpfilename
):
4258 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4259 tmpfilename
= f
.name
;
4261 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4262 except AttributeError:
4264 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4269 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
4270 f
.write(pretmpfilename
['Content']);
4272 exec_time_end
= time
.time();
4273 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4274 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4277 def download_from_url_to_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4278 global geturls_download_sleep
;
4280 sleep
= geturls_download_sleep
;
4283 if(not outfile
=="-"):
4284 outpath
= outpath
.rstrip(os
.path
.sep
);
4285 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4286 if(not os
.path
.exists(outpath
)):
4287 os
.makedirs(outpath
);
4288 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4290 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4292 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4293 if(not pretmpfilename
):
4295 tmpfilename
= pretmpfilename
['Filename'];
4296 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4298 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4299 exec_time_start
= time
.time();
4300 shutil
.move(tmpfilename
, filepath
);
4301 exec_time_end
= time
.time();
4302 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4303 if(os
.path
.exists(tmpfilename
)):
4304 os
.remove(tmpfilename
);
4305 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4307 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4308 tmpfilename
= pretmpfilename
['Filename'];
4309 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4312 exec_time_start
= time
.time();
4313 with
open(tmpfilename
, 'rb') as ft
:
4316 databytes
= ft
.read(buffersize
[1]);
4317 if not databytes
: break;
4318 datasize
= len(databytes
);
4319 fulldatasize
= datasize
+ fulldatasize
;
4322 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4323 downloaddiff
= fulldatasize
- prevdownsize
;
4324 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4325 prevdownsize
= fulldatasize
;
4328 fdata
= f
.getvalue();
4331 os
.remove(tmpfilename
);
4332 exec_time_end
= time
.time();
4333 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4334 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4337 def upload_file_to_ftp_file(ftpfile
, url
):
4338 urlparts
= urlparse
.urlparse(url
);
4339 file_name
= os
.path
.basename(urlparts
.path
);
4340 file_dir
= os
.path
.dirname(urlparts
.path
);
4341 if(urlparts
.username
is not None):
4342 ftp_username
= urlparts
.username
;
4344 ftp_username
= "anonymous";
4345 if(urlparts
.password
is not None):
4346 ftp_password
= urlparts
.password
;
4347 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4348 ftp_password
= "anonymous";
4351 if(urlparts
.scheme
=="ftp"):
4353 elif(urlparts
.scheme
=="ftps"):
4357 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4359 ftp_port
= urlparts
.port
;
4360 if(urlparts
.port
is None):
4363 ftp
.connect(urlparts
.hostname
, ftp_port
);
4364 except socket
.gaierror
:
4365 log
.info("Error With URL "+httpurl
);
4367 except socket
.timeout
:
4368 log
.info("Error With URL "+httpurl
);
4370 ftp
.login(urlparts
.username
, urlparts
.password
);
4371 if(urlparts
.scheme
=="ftps"):
4373 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
4378 def upload_file_to_ftp_string(ftpstring
, url
):
4379 ftpfileo
= BytesIO(ftpstring
);
4380 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
4385 def download_file_from_sftp_file(url
):
4386 urlparts
= urlparse
.urlparse(url
);
4387 file_name
= os
.path
.basename(urlparts
.path
);
4388 file_dir
= os
.path
.dirname(urlparts
.path
);
4389 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4391 sftp_port
= urlparts
.port
;
4392 if(urlparts
.port
is None):
4395 sftp_port
= urlparts
.port
;
4396 if(urlparts
.username
is not None):
4397 sftp_username
= urlparts
.username
;
4399 sftp_username
= "anonymous";
4400 if(urlparts
.password
is not None):
4401 sftp_password
= urlparts
.password
;
4402 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4403 sftp_password
= "anonymous";
4406 if(urlparts
.scheme
!="sftp"):
4408 ssh
= paramiko
.SSHClient();
4409 ssh
.load_system_host_keys();
4410 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
4412 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
4413 except paramiko
.ssh_exception
.SSHException
:
4415 except socket
.gaierror
:
4416 log
.info("Error With URL "+httpurl
);
4418 except socket
.timeout
:
4419 log
.info("Error With URL "+httpurl
);
4421 sftp
= ssh
.open_sftp();
4422 sftpfile
= BytesIO();
4423 sftp
.getfo(urlparts
.path
, sftpfile
);
4426 sftpfile
.seek(0, 0);
4429 def download_file_from_sftp_file(url
):
4433 def download_file_from_sftp_string(url
):
4434 sftpfile
= download_file_from_sftp_file(url
);
4435 return sftpfile
.read();
4437 def download_file_from_ftp_string(url
):
4441 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4442 global geturls_download_sleep
, havebrotli
;
4444 sleep
= geturls_download_sleep
;
4447 urlparts
= urlparse
.urlparse(httpurl
);
4448 if(isinstance(httpheaders
, list)):
4449 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4450 httpheaders
= fix_header_names(httpheaders
);
4451 if(httpuseragent
is not None):
4452 if('User-Agent' in httpheaders
):
4453 httpheaders
['User-Agent'] = httpuseragent
;
4455 httpuseragent
.update({'User-Agent': httpuseragent
});
4456 if(httpreferer
is not None):
4457 if('Referer' in httpheaders
):
4458 httpheaders
['Referer'] = httpreferer
;
4460 httpuseragent
.update({'Referer': httpreferer
});
4461 if(isinstance(httpheaders
, dict)):
4462 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4464 geturls_text
= download_file_from_sftp_file(httpurl
);
4465 if(not geturls_text
):
4467 downloadsize
= None;
4468 if(downloadsize
is not None):
4469 downloadsize
= int(downloadsize
);
4470 if downloadsize
is None: downloadsize
= 0;
4473 log
.info("Downloading URL "+httpurl
);
4474 with
BytesIO() as strbuf
:
4476 databytes
= geturls_text
.read(buffersize
);
4477 if not databytes
: break;
4478 datasize
= len(databytes
);
4479 fulldatasize
= datasize
+ fulldatasize
;
4482 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4483 downloaddiff
= fulldatasize
- prevdownsize
;
4484 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4485 prevdownsize
= fulldatasize
;
4486 strbuf
.write(databytes
);
4488 returnval_content
= strbuf
.read();
4489 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4490 geturls_text
.close();
4493 if(not haveparamiko
):
4494 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4498 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4499 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
4500 exec_time_start
= time
.time();
4501 myhash
= hashlib
.new("sha1");
4502 if(sys
.version
[0]=="2"):
4503 myhash
.update(httpurl
);
4504 myhash
.update(str(buffersize
));
4505 myhash
.update(str(exec_time_start
));
4506 if(sys
.version
[0]>="3"):
4507 myhash
.update(httpurl
.encode('utf-8'));
4508 myhash
.update(str(buffersize
).encode('utf-8'));
4509 myhash
.update(str(exec_time_start
).encode('utf-8'));
4510 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4512 sleep
= geturls_download_sleep
;
4515 pretmpfilename
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4516 if(not pretmpfilename
):
4518 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4519 tmpfilename
= f
.name
;
4521 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4522 except AttributeError:
4524 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4529 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
4530 f
.write(pretmpfilename
['Content']);
4532 exec_time_end
= time
.time();
4533 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4534 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4537 if(not haveparamiko
):
4538 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4542 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4543 global geturls_download_sleep
;
4545 sleep
= geturls_download_sleep
;
4548 if(not outfile
=="-"):
4549 outpath
= outpath
.rstrip(os
.path
.sep
);
4550 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4551 if(not os
.path
.exists(outpath
)):
4552 os
.makedirs(outpath
);
4553 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4555 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4557 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4558 if(not pretmpfilename
):
4560 tmpfilename
= pretmpfilename
['Filename'];
4561 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4563 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4564 exec_time_start
= time
.time();
4565 shutil
.move(tmpfilename
, filepath
);
4566 exec_time_end
= time
.time();
4567 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4568 if(os
.path
.exists(tmpfilename
)):
4569 os
.remove(tmpfilename
);
4570 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4572 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4573 tmpfilename
= pretmpfilename
['Filename'];
4574 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4577 exec_time_start
= time
.time();
4578 with
open(tmpfilename
, 'rb') as ft
:
4581 databytes
= ft
.read(buffersize
[1]);
4582 if not databytes
: break;
4583 datasize
= len(databytes
);
4584 fulldatasize
= datasize
+ fulldatasize
;
4587 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4588 downloaddiff
= fulldatasize
- prevdownsize
;
4589 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4590 prevdownsize
= fulldatasize
;
4593 fdata
= f
.getvalue();
4596 os
.remove(tmpfilename
);
4597 exec_time_end
= time
.time();
4598 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4599 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4602 if(not haveparamiko
):
4603 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4607 def upload_file_to_sftp_file(sftpfile
, url
):
4608 urlparts
= urlparse
.urlparse(url
);
4609 file_name
= os
.path
.basename(urlparts
.path
);
4610 file_dir
= os
.path
.dirname(urlparts
.path
);
4611 sftp_port
= urlparts
.port
;
4612 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4614 if(urlparts
.port
is None):
4617 sftp_port
= urlparts
.port
;
4618 if(urlparts
.username
is not None):
4619 sftp_username
= urlparts
.username
;
4621 sftp_username
= "anonymous";
4622 if(urlparts
.password
is not None):
4623 sftp_password
= urlparts
.password
;
4624 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4625 sftp_password
= "anonymous";
4628 if(urlparts
.scheme
!="sftp"):
4630 ssh
= paramiko
.SSHClient();
4631 ssh
.load_system_host_keys();
4632 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
4634 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
4635 except paramiko
.ssh_exception
.SSHException
:
4637 except socket
.gaierror
:
4638 log
.info("Error With URL "+httpurl
);
4640 except socket
.timeout
:
4641 log
.info("Error With URL "+httpurl
);
4643 sftp
= ssh
.open_sftp();
4644 sftp
.putfo(sftpfile
, urlparts
.path
);
4647 sftpfile
.seek(0, 0);
4650 def upload_file_to_sftp_file(sftpfile
, url
):
4654 def upload_file_to_sftp_string(sftpstring
, url
):
4655 sftpfileo
= BytesIO(sftpstring
);
4656 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
4660 def upload_file_to_sftp_string(url
):
4665 def download_file_from_pysftp_file(url
):
4666 urlparts
= urlparse
.urlparse(url
);
4667 file_name
= os
.path
.basename(urlparts
.path
);
4668 file_dir
= os
.path
.dirname(urlparts
.path
);
4669 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4671 sftp_port
= urlparts
.port
;
4672 if(urlparts
.port
is None):
4675 sftp_port
= urlparts
.port
;
4676 if(urlparts
.username
is not None):
4677 sftp_username
= urlparts
.username
;
4679 sftp_username
= "anonymous";
4680 if(urlparts
.password
is not None):
4681 sftp_password
= urlparts
.password
;
4682 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4683 sftp_password
= "anonymous";
4686 if(urlparts
.scheme
!="sftp"):
4689 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
4690 except paramiko
.ssh_exception
.SSHException
:
4692 except socket
.gaierror
:
4693 log
.info("Error With URL "+httpurl
);
4695 except socket
.timeout
:
4696 log
.info("Error With URL "+httpurl
);
4698 sftp
= ssh
.open_sftp();
4699 sftpfile
= BytesIO();
4700 sftp
.getfo(urlparts
.path
, sftpfile
);
4703 sftpfile
.seek(0, 0);
4706 def download_file_from_pysftp_file(url
):
4710 def download_file_from_pysftp_string(url
):
4711 sftpfile
= download_file_from_pysftp_file(url
);
4712 return sftpfile
.read();
4714 def download_file_from_ftp_string(url
):
4718 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4719 global geturls_download_sleep
, havebrotli
;
4721 sleep
= geturls_download_sleep
;
4724 urlparts
= urlparse
.urlparse(httpurl
);
4725 if(isinstance(httpheaders
, list)):
4726 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4727 httpheaders
= fix_header_names(httpheaders
);
4728 if(isinstance(httpheaders
, dict)):
4729 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4731 geturls_text
= download_file_from_pysftp_file(httpurl
);
4732 if(not geturls_text
):
4734 downloadsize
= None;
4735 if(downloadsize
is not None):
4736 downloadsize
= int(downloadsize
);
4737 if downloadsize
is None: downloadsize
= 0;
4740 log
.info("Downloading URL "+httpurl
);
4741 with
BytesIO() as strbuf
:
4743 databytes
= geturls_text
.read(buffersize
);
4744 if not databytes
: break;
4745 datasize
= len(databytes
);
4746 fulldatasize
= datasize
+ fulldatasize
;
4749 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4750 downloaddiff
= fulldatasize
- prevdownsize
;
4751 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4752 prevdownsize
= fulldatasize
;
4753 strbuf
.write(databytes
);
4755 returnval_content
= strbuf
.read();
4756 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Contentsize': fulldatasize
, 'ContentsizeAlt': {'IEC': get_readable_size(fulldatasize
, 2, "IEC"), 'SI': get_readable_size(fulldatasize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4757 geturls_text
.close();
4761 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1, timeout
=10):
4765 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4766 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
4767 exec_time_start
= time
.time();
4768 myhash
= hashlib
.new("sha1");
4769 if(sys
.version
[0]=="2"):
4770 myhash
.update(httpurl
);
4771 myhash
.update(str(buffersize
));
4772 myhash
.update(str(exec_time_start
));
4773 if(sys
.version
[0]>="3"):
4774 myhash
.update(httpurl
.encode('utf-8'));
4775 myhash
.update(str(buffersize
).encode('utf-8'));
4776 myhash
.update(str(exec_time_start
).encode('utf-8'));
4777 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4779 sleep
= geturls_download_sleep
;
4782 pretmpfilename
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
, timeout
);
4783 if(not pretmpfilename
):
4785 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4786 tmpfilename
= f
.name
;
4788 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4789 except AttributeError:
4791 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4796 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': pretmpfilename
.get('Contentsize'), 'FilesizeAlt': {'IEC': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "IEC"), 'SI': get_readable_size(pretmpfilename
.get('Contentsize'), 2, "SI")}, 'Headers': pretmpfilename
.get('Headers'), 'Version': pretmpfilename
.get('Version'), 'Method': pretmpfilename
.get('Method'), 'HeadersSent': pretmpfilename
.get('HeadersSent'), 'URL': pretmpfilename
.get('URL'), 'Code': pretmpfilename
.get('Code'), 'Reason': pretmpfilename
.get('Reason')};
4797 f
.write(pretmpfilename
['Content']);
4799 exec_time_end
= time
.time();
4800 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4801 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'FilesizeAlt': {'IEC': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "IEC"), 'SI': get_readable_size(os
.path
.getsize(tmpfilename
), 2, "SI")}, 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4805 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
4809 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4810 global geturls_download_sleep
;
4812 sleep
= geturls_download_sleep
;
4815 if(not outfile
=="-"):
4816 outpath
= outpath
.rstrip(os
.path
.sep
);
4817 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4818 if(not os
.path
.exists(outpath
)):
4819 os
.makedirs(outpath
);
4820 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4822 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4824 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4825 if(not pretmpfilename
):
4827 tmpfilename
= pretmpfilename
['Filename'];
4828 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4830 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4831 exec_time_start
= time
.time();
4832 shutil
.move(tmpfilename
, filepath
);
4833 exec_time_end
= time
.time();
4834 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4835 if(os
.path
.exists(tmpfilename
)):
4836 os
.remove(tmpfilename
);
4837 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4839 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, ranges
, buffersize
[0], sleep
, timeout
);
4840 tmpfilename
= pretmpfilename
['Filename'];
4841 downloadsize
= int(os
.path
.getsize(tmpfilename
));
4844 exec_time_start
= time
.time();
4845 with
open(tmpfilename
, 'rb') as ft
:
4848 databytes
= ft
.read(buffersize
[1]);
4849 if not databytes
: break;
4850 datasize
= len(databytes
);
4851 fulldatasize
= datasize
+ fulldatasize
;
4854 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4855 downloaddiff
= fulldatasize
- prevdownsize
;
4856 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4857 prevdownsize
= fulldatasize
;
4860 fdata
= f
.getvalue();
4863 os
.remove(tmpfilename
);
4864 exec_time_end
= time
.time();
4865 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4866 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4870 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
4874 def upload_file_to_pysftp_file(sftpfile
, url
):
4875 urlparts
= urlparse
.urlparse(url
);
4876 file_name
= os
.path
.basename(urlparts
.path
);
4877 file_dir
= os
.path
.dirname(urlparts
.path
);
4878 sftp_port
= urlparts
.port
;
4879 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4881 if(urlparts
.port
is None):
4884 sftp_port
= urlparts
.port
;
4885 if(urlparts
.username
is not None):
4886 sftp_username
= urlparts
.username
;
4888 sftp_username
= "anonymous";
4889 if(urlparts
.password
is not None):
4890 sftp_password
= urlparts
.password
;
4891 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4892 sftp_password
= "anonymous";
4895 if(urlparts
.scheme
!="sftp"):
4898 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
4899 except paramiko
.ssh_exception
.SSHException
:
4901 except socket
.gaierror
:
4902 log
.info("Error With URL "+httpurl
);
4904 except socket
.timeout
:
4905 log
.info("Error With URL "+httpurl
);
4907 sftp
= ssh
.open_sftp();
4908 sftp
.putfo(sftpfile
, urlparts
.path
);
4911 sftpfile
.seek(0, 0);
4914 def upload_file_to_pysftp_file(sftpfile
, url
):
4918 def upload_file_to_pysftp_string(sftpstring
, url
):
4919 sftpfileo
= BytesIO(sftpstring
);
4920 sftpfile
= upload_file_to_pysftp_files(ftpfileo
, url
);
4924 def upload_file_to_pysftp_string(url
):