4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwgetold.py - Last Update: 9/24/2023 Ver. 1.5.0 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, gzip
, time
, argparse
, cgi
, subprocess
, socket
, email
.utils
, datetime
, time
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
29 havemechanize
= False;
34 havemechanize
= False;
61 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
89 if(sys
.version
[0]=="2"):
91 from io
import StringIO
, BytesIO
;
94 from cStringIO
import StringIO
;
95 from cStringIO
import StringIO
as BytesIO
;
97 from StringIO
import StringIO
;
98 from StringIO
import StringIO
as BytesIO
;
99 # From http://python-future.org/compatible_idioms.html
100 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
101 from urllib
import urlencode
;
102 from urllib
import urlopen
as urlopenalt
;
103 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
, URLError
, build_opener
, HTTPCookieProcessor
;
104 import urlparse
, cookielib
;
105 from httplib
import HTTPConnection
, HTTPSConnection
;
106 if(sys
.version
[0]>="3"):
107 from io
import StringIO
, BytesIO
;
108 # From http://python-future.org/compatible_idioms.html
109 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
110 from urllib
.request
import urlopen
, Request
, install_opener
, build_opener
, HTTPCookieProcessor
;
111 from urllib
.error
import HTTPError
, URLError
;
112 import urllib
.parse
as urlparse
;
113 import http
.cookiejar
as cookielib
;
114 from http
.client
import HTTPConnection
, HTTPSConnection
;
116 __program_name__
= "PyWWW-Get";
117 __program_alt_name__
= "PyWWWGet";
118 __program_small_name__
= "wwwget";
119 __project__
= __program_name__
;
120 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
121 __version_info__
= (1, 5, 0, "RC 1", 1);
122 __version_date_info__
= (2023, 9, 24, "RC 1", 1);
123 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
124 __revision__
= __version_info__
[3];
125 __revision_id__
= "$Id$";
126 if(__version_info__
[4] is not None):
127 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
128 if(__version_info__
[4] is None):
129 __version_date_plusrc__
= __version_date__
;
130 if(__version_info__
[3] is not None):
131 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
132 if(__version_info__
[3] is None):
133 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
135 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
137 pytempdir
= tempfile
.gettempdir();
139 PyBitness
= platform
.architecture();
140 if(PyBitness
=="32bit" or PyBitness
=="32"):
142 elif(PyBitness
=="64bit" or PyBitness
=="64"):
147 compression_supported
= "gzip, deflate";
149 compression_supported
= "gzip, deflate, br";
151 compression_supported
= "gzip, deflate";
153 geturls_cj
= cookielib
.CookieJar();
154 windowsNT4_ua_string
= "Windows NT 4.0";
155 windowsNT4_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
156 windows2k_ua_string
= "Windows NT 5.0";
157 windows2k_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
158 windowsXP_ua_string
= "Windows NT 5.1";
159 windowsXP_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
160 windowsXP64_ua_string
= "Windows NT 5.2; Win64; x64";
161 windowsXP64_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
162 windows7_ua_string
= "Windows NT 6.1; Win64; x64";
163 windows7_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
164 windows8_ua_string
= "Windows NT 6.2; Win64; x64";
165 windows8_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
166 windows81_ua_string
= "Windows NT 6.3; Win64; x64";
167 windows81_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
168 windows10_ua_string
= "Windows NT 10.0; Win64; x64";
169 windows10_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
170 windows11_ua_string
= "Windows NT 11.0; Win64; x64";
171 windows11_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
172 geturls_ua_firefox_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:109.0) Gecko/20100101 Firefox/117.0";
173 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
174 geturls_ua_chrome_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
175 geturls_ua_chromium_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
176 geturls_ua_palemoon_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
177 geturls_ua_opera_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
178 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
179 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; Trident/7.0; rv:11.0) like Gecko";
180 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
181 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
182 if(platform
.python_implementation()!=""):
183 py_implementation
= platform
.python_implementation();
184 if(platform
.python_implementation()==""):
185 py_implementation
= "Python";
186 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=py_implementation
, pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
187 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
188 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
189 geturls_ua
= geturls_ua_firefox_windows7
;
190 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
191 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
192 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
193 geturls_headers_chrome_windows7
.update(windows7_ua_addon
);
194 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
195 geturls_headers_chromium_windows7
.update(windows7_ua_addon
);
196 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
197 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
198 geturls_headers_opera_windows7
.update(windows7_ua_addon
);
199 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
200 geturls_headers_vivaldi_windows7
.update(windows7_ua_addon
);
201 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
202 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
203 geturls_headers_microsoft_edge_windows7
.update(windows7_ua_addon
);
204 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
205 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
206 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
207 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
208 geturls_headers
= geturls_headers_firefox_windows7
;
209 geturls_download_sleep
= 0;
211 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
212 if(outtype
=="print" and dbgenable
):
215 elif(outtype
=="log" and dbgenable
):
216 logging
.info(dbgtxt
);
218 elif(outtype
=="warning" and dbgenable
):
219 logging
.warning(dbgtxt
);
221 elif(outtype
=="error" and dbgenable
):
222 logging
.error(dbgtxt
);
224 elif(outtype
=="critical" and dbgenable
):
225 logging
.critical(dbgtxt
);
227 elif(outtype
=="exception" and dbgenable
):
228 logging
.exception(dbgtxt
);
230 elif(outtype
=="logalt" and dbgenable
):
231 logging
.log(dgblevel
, dbgtxt
);
233 elif(outtype
=="debug" and dbgenable
):
234 logging
.debug(dbgtxt
);
242 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
243 dbgout
= verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
248 def add_url_param(url
, **params
):
250 parts
= list(urlparse
.urlsplit(url
));
251 d
= dict(cgi
.parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
253 parts
[n
]=urlencode(d
);
254 return urlparse
.urlunsplit(parts
);
256 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
257 def which_exec(execfile):
258 for path
in os
.environ
["PATH"].split(":"):
259 if os
.path
.exists(path
+ "/" + execfile):
260 return path
+ "/" + execfile;
262 def listize(varlist
):
270 newlistreg
.update({ilx
: varlist
[il
]});
271 newlistrev
.update({varlist
[il
]: ilx
});
274 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
277 def twolistize(varlist
):
287 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
288 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
289 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
290 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
293 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
294 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
295 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
298 def arglistize(proexec
, *varlist
):
302 newarglist
= [proexec
];
304 if varlist
[il
][0] is not None:
305 newarglist
.append(varlist
[il
][0]);
306 if varlist
[il
][1] is not None:
307 newarglist
.append(varlist
[il
][1]);
311 def fix_header_names(header_dict
):
312 header_dict
= {k
.title(): v
for k
, v
in header_dict
.items()};
315 # hms_string by ArcGIS Python Recipes
316 # https://arcpy.wordpress.com/2012/04/20/146/
317 def hms_string(sec_elapsed
):
318 h
= int(sec_elapsed
/ (60 * 60));
319 m
= int((sec_elapsed
% (60 * 60)) / 60);
320 s
= sec_elapsed
% 60.0;
321 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
323 # get_readable_size by Lipis
324 # http://stackoverflow.com/posts/14998888/revisions
325 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
327 if(unit
!="IEC" and unit
!="SI"):
330 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
331 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
334 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
335 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
340 if abs(bytes
) < unitsize
:
341 strformat
= "%3."+str(precision
)+"f%s";
342 pre_return_val
= (strformat
% (bytes
, unit
));
343 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
344 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
345 alt_return_val
= pre_return_val
.split();
346 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
349 strformat
= "%."+str(precision
)+"f%s";
350 pre_return_val
= (strformat
% (bytes
, "YiB"));
351 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
352 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
353 alt_return_val
= pre_return_val
.split();
354 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
357 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
359 usehashtypes
= usehashtypes
.lower();
360 getfilesize
= os
.path
.getsize(infile
);
361 return_val
= get_readable_size(getfilesize
, precision
, unit
);
363 hashtypelist
= usehashtypes
.split(",");
364 openfile
= open(infile
, "rb");
365 filecontents
= openfile
.read();
368 listnumend
= len(hashtypelist
);
369 while(listnumcount
< listnumend
):
370 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
371 hashtypelistup
= hashtypelistlow
.upper();
372 filehash
= hashlib
.new(hashtypelistup
);
373 filehash
.update(filecontents
);
374 filegethash
= filehash
.hexdigest();
375 return_val
.update({hashtypelistup
: filegethash
});
379 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
381 usehashtypes
= usehashtypes
.lower();
382 getfilesize
= len(instring
);
383 return_val
= get_readable_size(getfilesize
, precision
, unit
);
385 hashtypelist
= usehashtypes
.split(",");
387 listnumend
= len(hashtypelist
);
388 while(listnumcount
< listnumend
):
389 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
390 hashtypelistup
= hashtypelistlow
.upper();
391 filehash
= hashlib
.new(hashtypelistup
);
392 if(sys
.version
[0]=="2"):
393 filehash
.update(instring
);
394 if(sys
.version
[0]>="3"):
395 filehash
.update(instring
.encode('utf-8'));
396 filegethash
= filehash
.hexdigest();
397 return_val
.update({hashtypelistup
: filegethash
});
401 def http_status_to_reason(code
):
404 101: 'Switching Protocols',
409 203: 'Non-Authoritative Information',
411 205: 'Reset Content',
412 206: 'Partial Content',
414 208: 'Already Reported',
416 300: 'Multiple Choices',
417 301: 'Moved Permanently',
422 307: 'Temporary Redirect',
423 308: 'Permanent Redirect',
426 402: 'Payment Required',
429 405: 'Method Not Allowed',
430 406: 'Not Acceptable',
431 407: 'Proxy Authentication Required',
432 408: 'Request Timeout',
435 411: 'Length Required',
436 412: 'Precondition Failed',
437 413: 'Payload Too Large',
439 415: 'Unsupported Media Type',
440 416: 'Range Not Satisfiable',
441 417: 'Expectation Failed',
442 421: 'Misdirected Request',
443 422: 'Unprocessable Entity',
445 424: 'Failed Dependency',
446 426: 'Upgrade Required',
447 428: 'Precondition Required',
448 429: 'Too Many Requests',
449 431: 'Request Header Fields Too Large',
450 451: 'Unavailable For Legal Reasons',
451 500: 'Internal Server Error',
452 501: 'Not Implemented',
454 503: 'Service Unavailable',
455 504: 'Gateway Timeout',
456 505: 'HTTP Version Not Supported',
457 506: 'Variant Also Negotiates',
458 507: 'Insufficient Storage',
459 508: 'Loop Detected',
461 511: 'Network Authentication Required'
463 return reasons
.get(code
, 'Unknown Status Code');
465 def ftp_status_to_reason(code
):
467 110: 'Restart marker reply',
468 120: 'Service ready in nnn minutes',
469 125: 'Data connection already open; transfer starting',
470 150: 'File status okay; about to open data connection',
472 202: 'Command not implemented, superfluous at this site',
473 211: 'System status, or system help reply',
474 212: 'Directory status',
477 215: 'NAME system type',
478 220: 'Service ready for new user',
479 221: 'Service closing control connection',
480 225: 'Data connection open; no transfer in progress',
481 226: 'Closing data connection',
482 227: 'Entering Passive Mode',
483 230: 'User logged in, proceed',
484 250: 'Requested file action okay, completed',
485 257: '"PATHNAME" created',
486 331: 'User name okay, need password',
487 332: 'Need account for login',
488 350: 'Requested file action pending further information',
489 421: 'Service not available, closing control connection',
490 425: 'Can\'t open data connection',
491 426: 'Connection closed; transfer aborted',
492 450: 'Requested file action not taken',
493 451: 'Requested action aborted. Local error in processing',
494 452: 'Requested action not taken. Insufficient storage space in system',
495 500: 'Syntax error, command unrecognized',
496 501: 'Syntax error in parameters or arguments',
497 502: 'Command not implemented',
498 503: 'Bad sequence of commands',
499 504: 'Command not implemented for that parameter',
500 530: 'Not logged in',
501 532: 'Need account for storing files',
502 550: 'Requested action not taken. File unavailable',
503 551: 'Requested action aborted. Page type unknown',
504 552: 'Requested file action aborted. Exceeded storage allocation',
505 553: 'Requested action not taken. File name not allowed'
507 return reasons
.get(code
, 'Unknown Status Code');
509 def sftp_status_to_reason(code
):
513 2: 'SSH_FX_NO_SUCH_FILE',
514 3: 'SSH_FX_PERMISSION_DENIED',
516 5: 'SSH_FX_BAD_MESSAGE',
517 6: 'SSH_FX_NO_CONNECTION',
518 7: 'SSH_FX_CONNECTION_LOST',
519 8: 'SSH_FX_OP_UNSUPPORTED'
521 return reasons
.get(code
, 'Unknown Status Code');
523 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
524 if isinstance(headers
, dict):
526 if(sys
.version
[0]=="2"):
527 for headkey
, headvalue
in headers
.iteritems():
528 returnval
.append((headkey
, headvalue
));
529 if(sys
.version
[0]>="3"):
530 for headkey
, headvalue
in headers
.items():
531 returnval
.append((headkey
, headvalue
));
532 elif isinstance(headers
, list):
538 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
539 if isinstance(headers
, dict):
541 if(sys
.version
[0]=="2"):
542 for headkey
, headvalue
in headers
.iteritems():
543 returnval
.append(headkey
+": "+headvalue
);
544 if(sys
.version
[0]>="3"):
545 for headkey
, headvalue
in headers
.items():
546 returnval
.append(headkey
+": "+headvalue
);
547 elif isinstance(headers
, list):
553 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", compression_supported
), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
554 if isinstance(headers
, list):
559 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
561 elif isinstance(headers
, dict):
567 def get_httplib_support(checkvalue
=None):
568 global haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
570 returnval
.append("ftp");
571 returnval
.append("httplib");
573 returnval
.append("httplib2");
574 returnval
.append("urllib");
576 returnval
.append("urllib3");
577 returnval
.append("request3");
578 returnval
.append("request");
580 returnval
.append("requests");
582 returnval
.append("httpx");
583 returnval
.append("httpx2");
585 returnval
.append("mechanize");
587 returnval
.append("sftp");
589 returnval
.append("pysftp");
590 if(not checkvalue
is None):
591 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
592 checkvalue
= "urllib";
593 if(checkvalue
=="httplib1"):
594 checkvalue
= "httplib";
595 if(checkvalue
in returnval
):
601 def check_httplib_support(checkvalue
="urllib"):
602 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
603 checkvalue
= "urllib";
604 if(checkvalue
=="httplib1"):
605 checkvalue
= "httplib";
606 returnval
= get_httplib_support(checkvalue
);
609 def get_httplib_support_list():
610 returnval
= get_httplib_support(None);
613 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", sleep
=-1):
614 global geturls_download_sleep
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
616 sleep
= geturls_download_sleep
;
617 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
618 httplibuse
= "urllib";
619 if(httplibuse
=="httplib1"):
620 httplibuse
= "httplib";
621 if(not haverequests
and httplibuse
=="requests"):
622 httplibuse
= "urllib";
623 if(not havehttpx
and httplibuse
=="httpx"):
624 httplibuse
= "urllib";
625 if(not havehttpx
and httplibuse
=="httpx2"):
626 httplibuse
= "urllib";
627 if(not havehttpcore
and httplibuse
=="httpcore"):
628 httplibuse
= "urllib";
629 if(not havehttpcore
and httplibuse
=="httpcore2"):
630 httplibuse
= "urllib";
631 if(not havemechanize
and httplibuse
=="mechanize"):
632 httplibuse
= "urllib";
633 if(not havepycurl
and httplibuse
=="pycurl"):
634 httplibuse
= "urllib";
635 if(not havehttplib2
and httplibuse
=="httplib2"):
636 httplibuse
= "httplib";
637 if(not haveparamiko
and httplibuse
=="sftp"):
639 if(not havepysftp
and httplibuse
=="pysftp"):
641 if(httplibuse
=="urllib"):
642 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
643 elif(httplibuse
=="request"):
644 returnval
= download_from_url_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
645 elif(httplibuse
=="request3"):
646 returnval
= download_from_url_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
647 elif(httplibuse
=="httplib"):
648 returnval
= download_from_url_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
649 elif(httplibuse
=="httplib2"):
650 returnval
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
651 elif(httplibuse
=="urllib3"):
652 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
653 elif(httplibuse
=="requests"):
654 returnval
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
655 elif(httplibuse
=="httpx"):
656 returnval
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
657 elif(httplibuse
=="httpx2"):
658 returnval
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
659 elif(httplibuse
=="httpcore"):
660 returnval
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
661 elif(httplibuse
=="httpcore2"):
662 returnval
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
663 elif(httplibuse
=="mechanize"):
664 returnval
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
665 elif(httplibuse
=="ftp"):
666 returnval
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
667 elif(httplibuse
=="sftp"):
668 returnval
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
669 elif(httplibuse
=="pysftp"):
670 returnval
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
675 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1):
676 global geturls_download_sleep
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
678 sleep
= geturls_download_sleep
;
679 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
680 httplibuse
= "urllib";
681 if(httplibuse
=="httplib1"):
682 httplibuse
= "httplib";
683 if(not haverequests
and httplibuse
=="requests"):
684 httplibuse
= "urllib";
685 if(not havehttpx
and httplibuse
=="httpx"):
686 httplibuse
= "urllib";
687 if(not havehttpx
and httplibuse
=="httpx2"):
688 httplibuse
= "urllib";
689 if(not havehttpcore
and httplibuse
=="httpcore"):
690 httplibuse
= "urllib";
691 if(not havehttpcore
and httplibuse
=="httpcore2"):
692 httplibuse
= "urllib";
693 if(not havemechanize
and httplibuse
=="mechanize"):
694 httplibuse
= "urllib";
695 if(not havepycurl
and httplibuse
=="pycurl"):
696 httplibuse
= "urllib";
697 if(not havehttplib2
and httplibuse
=="httplib2"):
698 httplibuse
= "httplib";
699 if(not haveparamiko
and httplibuse
=="sftp"):
701 if(not haveparamiko
and httplibuse
=="pysftp"):
703 if(httplibuse
=="urllib"):
704 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
705 elif(httplibuse
=="request"):
706 returnval
= download_from_url_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
707 elif(httplibuse
=="request3"):
708 returnval
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
709 elif(httplibuse
=="httplib"):
710 returnval
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
711 elif(httplibuse
=="httplib2"):
712 returnval
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
713 elif(httplibuse
=="urllib3"):
714 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
715 elif(httplibuse
=="requests"):
716 returnval
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
717 elif(httplibuse
=="httpx"):
718 returnval
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
719 elif(httplibuse
=="httpx2"):
720 returnval
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
721 elif(httplibuse
=="httpcore"):
722 returnval
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
723 elif(httplibuse
=="httpcore2"):
724 returnval
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
725 elif(httplibuse
=="mechanize"):
726 returnval
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
727 elif(httplibuse
=="ftp"):
728 returnval
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
729 elif(httplibuse
=="sftp"):
730 returnval
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
731 elif(httplibuse
=="pysftp"):
732 returnval
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
737 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
738 global geturls_download_sleep
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
740 sleep
= geturls_download_sleep
;
741 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
742 httplibuse
= "urllib";
743 if(httplibuse
=="httplib1"):
744 httplibuse
= "httplib";
745 if(not haverequests
and httplibuse
=="requests"):
746 httplibuse
= "urllib";
747 if(not havehttpx
and httplibuse
=="httpx"):
748 httplibuse
= "urllib";
749 if(not havehttpx
and httplibuse
=="httpx2"):
750 httplibuse
= "urllib";
751 if(not havehttpcore
and httplibuse
=="httpcore"):
752 httplibuse
= "urllib";
753 if(not havehttpcore
and httplibuse
=="httpcore2"):
754 httplibuse
= "urllib";
755 if(not havemechanize
and httplibuse
=="mechanize"):
756 httplibuse
= "urllib";
757 if(not havepycurl
and httplibuse
=="pycurl"):
758 httplibuse
= "urllib";
759 if(not havehttplib2
and httplibuse
=="httplib2"):
760 httplibuse
= "httplib";
761 if(not haveparamiko
and httplibuse
=="sftp"):
763 if(not havepysftp
and httplibuse
=="pysftp"):
765 if(httplibuse
=="urllib"):
766 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
767 elif(httplibuse
=="request"):
768 returnval
= download_from_url_to_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
769 elif(httplibuse
=="request3"):
770 returnval
= download_from_url_to_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
771 elif(httplibuse
=="httplib"):
772 returnval
= download_from_url_to_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
773 elif(httplibuse
=="httplib2"):
774 returnval
= download_from_url_to_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
775 elif(httplibuse
=="urllib3"):
776 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
777 elif(httplibuse
=="requests"):
778 returnval
= download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
779 elif(httplibuse
=="httpx"):
780 returnval
= download_from_url_to_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
781 elif(httplibuse
=="httpx2"):
782 returnval
= download_from_url_to_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
783 elif(httplibuse
=="httpcore"):
784 returnval
= download_from_url_to_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
785 elif(httplibuse
=="httpcore2"):
786 returnval
= download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
787 elif(httplibuse
=="mechanize"):
788 returnval
= download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
789 elif(httplibuse
=="ftp"):
790 returnval
= download_from_url_to_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
791 elif(httplibuse
=="sftp"):
792 returnval
= download_from_url_to_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
793 elif(httplibuse
=="pysftp"):
794 returnval
= download_from_url_to_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
799 def download_from_url_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
800 global geturls_download_sleep
, havebrotli
;
802 sleep
= geturls_download_sleep
;
803 urlparts
= urlparse
.urlparse(httpurl
);
804 if(isinstance(httpheaders
, list)):
805 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
806 httpheaders
= fix_header_names(httpheaders
);
807 if(httpuseragent
is not None):
808 if('User-Agent' in httpheaders
):
809 httpheaders
['User-Agent'] = httpuseragent
;
811 httpuseragent
.update({'User-Agent': httpuseragent
});
812 if(httpreferer
is not None):
813 if('Referer' in httpheaders
):
814 httpheaders
['Referer'] = httpreferer
;
816 httpuseragent
.update({'Referer': httpreferer
});
817 if(urlparts
.username
is not None or urlparts
.password
is not None):
818 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
819 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
820 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
821 if(isinstance(httpheaders
, dict)):
822 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
823 geturls_opener
.addheaders
= httpheaders
;
825 if(postdata
is not None and not isinstance(postdata
, dict)):
826 postdata
= urlencode(postdata
);
828 if(httpmethod
=="GET"):
829 geturls_text
= geturls_opener
.open(httpurl
);
830 elif(httpmethod
=="POST"):
831 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
833 geturls_text
= geturls_opener
.open(httpurl
);
834 except HTTPError
as geturls_text_error
:
835 geturls_text
= geturls_text_error
;
836 log
.info("Error With URL "+httpurl
);
838 log
.info("Error With URL "+httpurl
);
840 except socket
.timeout
:
841 log
.info("Error With URL "+httpurl
);
843 httpcodeout
= geturls_text
.getcode();
844 httpcodereason
= geturls_text
.reason
;
845 httpversionout
= "1.1";
846 httpmethodout
= httpmethod
;
847 httpurlout
= geturls_text
.geturl();
848 httpheaderout
= geturls_text
.info();
849 httpheadersentout
= httpheaders
;
850 if(isinstance(httpheaderout
, list)):
851 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
852 httpheaderout
= fix_header_names(httpheaderout
);
853 if(sys
.version
[0]=="2"):
855 prehttpheaderout
= httpheaderout
;
856 httpheaderkeys
= httpheaderout
.keys();
857 imax
= len(httpheaderkeys
);
861 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
863 except AttributeError:
865 if(isinstance(httpheadersentout
, list)):
866 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
867 httpheadersentout
= fix_header_names(httpheadersentout
);
868 log
.info("Downloading URL "+httpurl
);
869 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
870 if(sys
.version
[0]=="2"):
871 strbuf
= StringIO(geturls_text
.read());
872 if(sys
.version
[0]>="3"):
873 strbuf
= BytesIO(geturls_text
.read());
874 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
875 returnval_content
= gzstrbuf
.read()[:];
876 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
877 returnval_content
= geturls_text
.read()[:];
878 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
879 returnval_content
= geturls_text
.read()[:];
880 returnval_content
= brotli
.decompress(returnval_content
);
881 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
882 geturls_text
.close();
885 def download_from_url_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
886 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
887 exec_time_start
= time
.time();
888 myhash
= hashlib
.new("sha1");
889 if(sys
.version
[0]=="2"):
890 myhash
.update(httpurl
);
891 myhash
.update(str(buffersize
));
892 myhash
.update(str(exec_time_start
));
893 if(sys
.version
[0]>="3"):
894 myhash
.update(httpurl
.encode('utf-8'));
895 myhash
.update(str(buffersize
).encode('utf-8'));
896 myhash
.update(str(exec_time_start
).encode('utf-8'));
897 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
899 sleep
= geturls_download_sleep
;
900 urlparts
= urlparse
.urlparse(httpurl
);
901 if(isinstance(httpheaders
, list)):
902 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
903 httpheaders
= fix_header_names(httpheaders
);
904 if(httpuseragent
is not None):
905 if('User-Agent' in httpheaders
):
906 httpheaders
['User-Agent'] = httpuseragent
;
908 httpuseragent
.update({'User-Agent': httpuseragent
});
909 if(httpreferer
is not None):
910 if('Referer' in httpheaders
):
911 httpheaders
['Referer'] = httpreferer
;
913 httpuseragent
.update({'Referer': httpreferer
});
914 if(urlparts
.username
is not None or urlparts
.password
is not None):
915 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
916 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
917 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
918 if(isinstance(httpheaders
, dict)):
919 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
920 geturls_opener
.addheaders
= httpheaders
;
923 if(httpmethod
=="GET"):
924 geturls_text
= geturls_opener
.open(httpurl
);
925 elif(httpmethod
=="POST"):
926 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
928 geturls_text
= geturls_opener
.open(httpurl
);
929 except HTTPError
as geturls_text_error
:
930 geturls_text
= geturls_text_error
;
931 log
.info("Error With URL "+httpurl
);
933 log
.info("Error With URL "+httpurl
);
935 except socket
.timeout
:
936 log
.info("Error With URL "+httpurl
);
938 except socket
.timeout
:
939 log
.info("Error With URL "+httpurl
);
941 httpcodeout
= geturls_text
.getcode();
942 httpcodereason
= geturls_text
.reason
;
943 httpversionout
= "1.1";
944 httpmethodout
= httpmethod
;
945 httpurlout
= geturls_text
.geturl();
946 httpheaderout
= geturls_text
.info();
947 httpheadersentout
= httpheaders
;
948 if(isinstance(httpheaderout
, list)):
949 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
950 if(sys
.version
[0]=="2"):
952 prehttpheaderout
= httpheaderout
;
953 httpheaderkeys
= httpheaderout
.keys();
954 imax
= len(httpheaderkeys
);
958 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
960 except AttributeError:
962 httpheaderout
= fix_header_names(httpheaderout
);
963 if(isinstance(httpheadersentout
, list)):
964 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
965 httpheadersentout
= fix_header_names(httpheadersentout
);
966 downloadsize
= httpheaderout
.get('Content-Length');
967 if(downloadsize
is not None):
968 downloadsize
= int(downloadsize
);
969 if downloadsize
is None: downloadsize
= 0;
972 log
.info("Downloading URL "+httpurl
);
973 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
974 tmpfilename
= f
.name
;
976 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
977 except AttributeError:
979 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
984 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
986 databytes
= geturls_text
.read(buffersize
);
987 if not databytes
: break;
988 datasize
= len(databytes
);
989 fulldatasize
= datasize
+ fulldatasize
;
992 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
993 downloaddiff
= fulldatasize
- prevdownsize
;
994 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
995 prevdownsize
= fulldatasize
;
998 geturls_text
.close();
999 exec_time_end
= time
.time();
1000 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1001 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1004 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1005 global geturls_download_sleep
;
1007 sleep
= geturls_download_sleep
;
1008 if(not outfile
=="-"):
1009 outpath
= outpath
.rstrip(os
.path
.sep
);
1010 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1011 if(not os
.path
.exists(outpath
)):
1012 os
.makedirs(outpath
);
1013 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1015 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1017 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1018 if(not pretmpfilename
):
1020 tmpfilename
= pretmpfilename
['Filename'];
1021 downloadsize
= os
.path
.getsize(tmpfilename
);
1023 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1024 exec_time_start
= time
.time();
1025 shutil
.move(tmpfilename
, filepath
);
1027 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1028 except AttributeError:
1030 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1035 exec_time_end
= time
.time();
1036 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1037 if(os
.path
.exists(tmpfilename
)):
1038 os
.remove(tmpfilename
);
1039 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1040 if(outfile
=="-" and sys
.version
[0]=="2"):
1041 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1042 if(not pretmpfilename
):
1044 tmpfilename
= pretmpfilename
['Filename'];
1045 downloadsize
= os
.path
.getsize(tmpfilename
);
1048 exec_time_start
= time
.time();
1049 with
open(tmpfilename
, 'rb') as ft
:
1052 databytes
= ft
.read(buffersize
[1]);
1053 if not databytes
: break;
1054 datasize
= len(databytes
);
1055 fulldatasize
= datasize
+ fulldatasize
;
1058 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1059 downloaddiff
= fulldatasize
- prevdownsize
;
1060 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1061 prevdownsize
= fulldatasize
;
1064 fdata
= f
.getvalue();
1067 os
.remove(tmpfilename
);
1068 exec_time_end
= time
.time();
1069 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1070 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1071 if(outfile
=="-" and sys
.version
[0]>="3"):
1072 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1073 tmpfilename
= pretmpfilename
['Filename'];
1074 downloadsize
= os
.path
.getsize(tmpfilename
);
1077 exec_time_start
= time
.time();
1078 with
open(tmpfilename
, 'rb') as ft
:
1081 databytes
= ft
.read(buffersize
[1]);
1082 if not databytes
: break;
1083 datasize
= len(databytes
);
1084 fulldatasize
= datasize
+ fulldatasize
;
1087 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1088 downloaddiff
= fulldatasize
- prevdownsize
;
1089 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1090 prevdownsize
= fulldatasize
;
1093 fdata
= f
.getvalue();
1096 os
.remove(tmpfilename
);
1097 exec_time_end
= time
.time();
1098 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1099 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1102 def download_from_url_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1103 global geturls_download_sleep
, havebrotli
;
1105 sleep
= geturls_download_sleep
;
1106 urlparts
= urlparse
.urlparse(httpurl
);
1107 if(isinstance(httpheaders
, list)):
1108 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1109 httpheaders
= fix_header_names(httpheaders
);
1110 if(httpuseragent
is not None):
1111 if('User-Agent' in httpheaders
):
1112 httpheaders
['User-Agent'] = httpuseragent
;
1114 httpuseragent
.update({'User-Agent': httpuseragent
});
1115 if(httpreferer
is not None):
1116 if('Referer' in httpheaders
):
1117 httpheaders
['Referer'] = httpreferer
;
1119 httpuseragent
.update({'Referer': httpreferer
});
1120 if(urlparts
.username
is not None or urlparts
.password
is not None):
1121 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1122 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1123 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1124 geturls_opener
.addheaders
= httpheaders
;
1126 if(urlparts
[0]=="http"):
1127 httpconn
= HTTPConnection(urlparts
[1]);
1128 elif(urlparts
[0]=="https"):
1129 httpconn
= HTTPSConnection(urlparts
[1]);
1132 if(postdata
is not None and not isinstance(postdata
, dict)):
1133 postdata
= urlencode(postdata
);
1135 if(httpmethod
=="GET"):
1136 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1137 elif(httpmethod
=="POST"):
1138 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1140 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1141 except socket
.timeout
:
1142 log
.info("Error With URL "+httpurl
);
1144 except socket
.gaierror
:
1145 log
.info("Error With URL "+httpurl
);
1147 geturls_text
= httpconn
.getresponse();
1148 httpcodeout
= geturls_text
.status
;
1149 httpcodereason
= geturls_text
.reason
;
1150 if(geturls_text
.version
=="10"):
1151 httpversionout
= "1.0";
1153 httpversionout
= "1.1";
1154 httpmethodout
= httpmethod
;
1155 httpurlout
= httpurl
;
1156 httpheaderout
= geturls_text
.getheaders();
1157 httpheadersentout
= httpheaders
;
1158 if(isinstance(httpheaderout
, list)):
1159 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1160 if(sys
.version
[0]=="2"):
1162 prehttpheaderout
= httpheaderout
;
1163 httpheaderkeys
= httpheaderout
.keys();
1164 imax
= len(httpheaderkeys
);
1168 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1170 except AttributeError:
1172 httpheaderout
= fix_header_names(httpheaderout
);
1173 if(isinstance(httpheadersentout
, list)):
1174 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1175 httpheadersentout
= fix_header_names(httpheadersentout
);
1176 log
.info("Downloading URL "+httpurl
);
1177 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
1178 if(sys
.version
[0]=="2"):
1179 strbuf
= StringIO(geturls_text
.read());
1180 if(sys
.version
[0]>="3"):
1181 strbuf
= BytesIO(geturls_text
.read());
1182 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1183 returnval_content
= gzstrbuf
.read()[:];
1184 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
1185 returnval_content
= geturls_text
.read()[:];
1186 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1187 returnval_content
= geturls_text
.read()[:];
1188 returnval_content
= brotli
.decompress(returnval_content
);
1189 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1190 geturls_text
.close();
1193 def download_from_url_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1194 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1195 exec_time_start
= time
.time();
1196 myhash
= hashlib
.new("sha1");
1197 if(sys
.version
[0]=="2"):
1198 myhash
.update(httpurl
);
1199 myhash
.update(str(buffersize
));
1200 myhash
.update(str(exec_time_start
));
1201 if(sys
.version
[0]>="3"):
1202 myhash
.update(httpurl
.encode('utf-8'));
1203 myhash
.update(str(buffersize
).encode('utf-8'));
1204 myhash
.update(str(exec_time_start
).encode('utf-8'));
1205 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1207 sleep
= geturls_download_sleep
;
1208 urlparts
= urlparse
.urlparse(httpurl
);
1209 if(isinstance(httpheaders
, list)):
1210 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1211 httpheaders
= fix_header_names(httpheaders
);
1212 if(httpuseragent
is not None):
1213 if('User-Agent' in httpheaders
):
1214 httpheaders
['User-Agent'] = httpuseragent
;
1216 httpuseragent
.update({'User-Agent': httpuseragent
});
1217 if(httpreferer
is not None):
1218 if('Referer' in httpheaders
):
1219 httpheaders
['Referer'] = httpreferer
;
1221 httpuseragent
.update({'Referer': httpreferer
});
1222 if(urlparts
.username
is not None or urlparts
.password
is not None):
1223 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1224 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1225 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1226 geturls_opener
.addheaders
= httpheaders
;
1228 if(urlparts
[0]=="http"):
1229 httpconn
= HTTPConnection(urlparts
[1]);
1230 elif(urlparts
[0]=="https"):
1231 httpconn
= HTTPSConnection(urlparts
[1]);
1234 if(postdata
is not None and not isinstance(postdata
, dict)):
1235 postdata
= urlencode(postdata
);
1237 if(httpmethod
=="GET"):
1238 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1239 elif(httpmethod
=="POST"):
1240 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1242 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1243 except socket
.timeout
:
1244 log
.info("Error With URL "+httpurl
);
1246 except socket
.gaierror
:
1247 log
.info("Error With URL "+httpurl
);
1249 geturls_text
= httpconn
.getresponse();
1250 httpcodeout
= geturls_text
.status
;
1251 httpcodereason
= geturls_text
.reason
;
1252 if(geturls_text
.version
=="10"):
1253 httpversionout
= "1.0";
1255 httpversionout
= "1.1";
1256 httpmethodout
= httpmethod
;
1257 httpurlout
= httpurl
;
1258 httpheaderout
= geturls_text
.getheaders();
1259 httpheadersentout
= httpheaders
;
1260 if(isinstance(httpheaderout
, list)):
1261 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1262 if(sys
.version
[0]=="2"):
1264 prehttpheaderout
= httpheaderout
;
1265 httpheaderkeys
= httpheaderout
.keys();
1266 imax
= len(httpheaderkeys
);
1270 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1272 except AttributeError:
1274 httpheaderout
= fix_header_names(httpheaderout
);
1275 if(isinstance(httpheadersentout
, list)):
1276 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1277 httpheadersentout
= fix_header_names(httpheadersentout
);
1278 downloadsize
= httpheaderout
.get('Content-Length');
1279 if(downloadsize
is not None):
1280 downloadsize
= int(downloadsize
);
1281 if downloadsize
is None: downloadsize
= 0;
1284 log
.info("Downloading URL "+httpurl
);
1285 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1286 tmpfilename
= f
.name
;
1288 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
1289 except AttributeError:
1291 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1296 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1298 databytes
= geturls_text
.read(buffersize
);
1299 if not databytes
: break;
1300 datasize
= len(databytes
);
1301 fulldatasize
= datasize
+ fulldatasize
;
1304 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1305 downloaddiff
= fulldatasize
- prevdownsize
;
1306 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1307 prevdownsize
= fulldatasize
;
1310 geturls_text
.close();
1311 exec_time_end
= time
.time();
1312 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1313 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1316 def download_from_url_to_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1317 global geturls_download_sleep
;
1319 sleep
= geturls_download_sleep
;
1320 if(not outfile
=="-"):
1321 outpath
= outpath
.rstrip(os
.path
.sep
);
1322 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1323 if(not os
.path
.exists(outpath
)):
1324 os
.makedirs(outpath
);
1325 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1327 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1329 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1330 if(not pretmpfilename
):
1332 tmpfilename
= pretmpfilename
['Filename'];
1333 downloadsize
= os
.path
.getsize(tmpfilename
);
1335 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1336 exec_time_start
= time
.time();
1337 shutil
.move(tmpfilename
, filepath
);
1339 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1340 except AttributeError:
1342 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1347 exec_time_end
= time
.time();
1348 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1349 if(os
.path
.exists(tmpfilename
)):
1350 os
.remove(tmpfilename
);
1351 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1352 if(outfile
=="-" and sys
.version
[0]=="2"):
1353 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1354 if(not pretmpfilename
):
1356 tmpfilename
= pretmpfilename
['Filename'];
1357 downloadsize
= os
.path
.getsize(tmpfilename
);
1360 exec_time_start
= time
.time();
1361 with
open(tmpfilename
, 'rb') as ft
:
1364 databytes
= ft
.read(buffersize
[1]);
1365 if not databytes
: break;
1366 datasize
= len(databytes
);
1367 fulldatasize
= datasize
+ fulldatasize
;
1370 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1371 downloaddiff
= fulldatasize
- prevdownsize
;
1372 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1373 prevdownsize
= fulldatasize
;
1376 fdata
= f
.getvalue();
1379 os
.remove(tmpfilename
);
1380 exec_time_end
= time
.time();
1381 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1382 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1383 if(outfile
=="-" and sys
.version
[0]>="3"):
1384 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1385 tmpfilename
= pretmpfilename
['Filename'];
1386 downloadsize
= os
.path
.getsize(tmpfilename
);
1389 exec_time_start
= time
.time();
1390 with
open(tmpfilename
, 'rb') as ft
:
1393 databytes
= ft
.read(buffersize
[1]);
1394 if not databytes
: break;
1395 datasize
= len(databytes
);
1396 fulldatasize
= datasize
+ fulldatasize
;
1399 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1400 downloaddiff
= fulldatasize
- prevdownsize
;
1401 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1402 prevdownsize
= fulldatasize
;
1405 fdata
= f
.getvalue();
1408 os
.remove(tmpfilename
);
1409 exec_time_end
= time
.time();
1410 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1411 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1415 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1416 global geturls_download_sleep
, havebrotli
;
1418 sleep
= geturls_download_sleep
;
1419 urlparts
= urlparse
.urlparse(httpurl
);
1420 if(isinstance(httpheaders
, list)):
1421 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1422 httpheaders
= fix_header_names(httpheaders
);
1423 if(httpuseragent
is not None):
1424 if('User-Agent' in httpheaders
):
1425 httpheaders
['User-Agent'] = httpuseragent
;
1427 httpuseragent
.update({'User-Agent': httpuseragent
});
1428 if(httpreferer
is not None):
1429 if('Referer' in httpheaders
):
1430 httpheaders
['Referer'] = httpreferer
;
1432 httpuseragent
.update({'Referer': httpreferer
});
1433 if(urlparts
.username
is not None or urlparts
.password
is not None):
1434 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1435 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1436 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1437 geturls_opener
.addheaders
= httpheaders
;
1439 if(urlparts
[0]=="http"):
1440 httpconn
= HTTPConnectionWithTimeout(urlparts
[1]);
1441 elif(urlparts
[0]=="https"):
1442 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1]);
1445 if(postdata
is not None and not isinstance(postdata
, dict)):
1446 postdata
= urlencode(postdata
);
1448 if(httpmethod
=="GET"):
1449 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1450 elif(httpmethod
=="POST"):
1451 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1453 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1454 except socket
.timeout
:
1455 log
.info("Error With URL "+httpurl
);
1457 except socket
.gaierror
:
1458 log
.info("Error With URL "+httpurl
);
1460 geturls_text
= httpconn
.getresponse();
1461 httpcodeout
= geturls_text
.status
;
1462 httpcodereason
= geturls_text
.reason
;
1463 if(geturls_text
.version
=="10"):
1464 httpversionout
= "1.0";
1466 httpversionout
= "1.1";
1467 httpmethodout
= httpmethod
;
1468 httpurlout
= httpurl
;
1469 httpheaderout
= geturls_text
.getheaders();
1470 httpheadersentout
= httpheaders
;
1471 if(isinstance(httpheaderout
, list)):
1472 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1473 if(sys
.version
[0]=="2"):
1475 prehttpheaderout
= httpheaderout
;
1476 httpheaderkeys
= httpheaderout
.keys();
1477 imax
= len(httpheaderkeys
);
1481 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1483 except AttributeError:
1485 httpheaderout
= fix_header_names(httpheaderout
);
1486 if(isinstance(httpheadersentout
, list)):
1487 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1488 httpheadersentout
= fix_header_names(httpheadersentout
);
1489 log
.info("Downloading URL "+httpurl
);
1490 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
1491 if(sys
.version
[0]=="2"):
1492 strbuf
= StringIO(geturls_text
.read());
1493 if(sys
.version
[0]>="3"):
1494 strbuf
= BytesIO(geturls_text
.read());
1495 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1496 returnval_content
= gzstrbuf
.read()[:];
1497 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
1498 returnval_content
= geturls_text
.read()[:];
1499 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1500 returnval_content
= geturls_text
.read()[:];
1501 returnval_content
= brotli
.decompress(returnval_content
);
1502 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1503 geturls_text
.close();
1506 if(not havehttplib2
):
1507 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1508 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
)
1512 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1513 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1514 exec_time_start
= time
.time();
1515 myhash
= hashlib
.new("sha1");
1516 if(sys
.version
[0]=="2"):
1517 myhash
.update(httpurl
);
1518 myhash
.update(str(buffersize
));
1519 myhash
.update(str(exec_time_start
));
1520 if(sys
.version
[0]>="3"):
1521 myhash
.update(httpurl
.encode('utf-8'));
1522 myhash
.update(str(buffersize
).encode('utf-8'));
1523 myhash
.update(str(exec_time_start
).encode('utf-8'));
1524 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1526 sleep
= geturls_download_sleep
;
1527 urlparts
= urlparse
.urlparse(httpurl
);
1528 if(isinstance(httpheaders
, list)):
1529 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1530 httpheaders
= fix_header_names(httpheaders
);
1531 if(httpuseragent
is not None):
1532 if('User-Agent' in httpheaders
):
1533 httpheaders
['User-Agent'] = httpuseragent
;
1535 httpuseragent
.update({'User-Agent': httpuseragent
});
1536 if(httpreferer
is not None):
1537 if('Referer' in httpheaders
):
1538 httpheaders
['Referer'] = httpreferer
;
1540 httpuseragent
.update({'Referer': httpreferer
});
1541 if(urlparts
.username
is not None or urlparts
.password
is not None):
1542 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1543 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1544 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1545 geturls_opener
.addheaders
= httpheaders
;
1547 if(urlparts
[0]=="http"):
1548 httpconn
= HTTPConnectionWithTimeout(urlparts
[1]);
1549 elif(urlparts
[0]=="https"):
1550 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1]);
1553 if(postdata
is not None and not isinstance(postdata
, dict)):
1554 postdata
= urlencode(postdata
);
1556 if(httpmethod
=="GET"):
1557 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1558 elif(httpmethod
=="POST"):
1559 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1561 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1562 except socket
.timeout
:
1563 log
.info("Error With URL "+httpurl
);
1565 except socket
.gaierror
:
1566 log
.info("Error With URL "+httpurl
);
1568 geturls_text
= httpconn
.getresponse();
1569 httpcodeout
= geturls_text
.status
;
1570 httpcodereason
= geturls_text
.reason
;
1571 if(geturls_text
.version
=="10"):
1572 httpversionout
= "1.0";
1574 httpversionout
= "1.1";
1575 httpmethodout
= httpmethod
;
1576 httpurlout
= httpurl
;
1577 httpheaderout
= geturls_text
.getheaders();
1578 httpheadersentout
= httpheaders
;
1579 if(isinstance(httpheaderout
, list)):
1580 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1581 if(sys
.version
[0]=="2"):
1583 prehttpheaderout
= httpheaderout
;
1584 httpheaderkeys
= httpheaderout
.keys();
1585 imax
= len(httpheaderkeys
);
1589 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1591 except AttributeError:
1593 httpheaderout
= fix_header_names(httpheaderout
);
1594 if(isinstance(httpheadersentout
, list)):
1595 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1596 httpheadersentout
= fix_header_names(httpheadersentout
);
1597 downloadsize
= httpheaderout
.get('Content-Length');
1598 if(downloadsize
is not None):
1599 downloadsize
= int(downloadsize
);
1600 if downloadsize
is None: downloadsize
= 0;
1603 log
.info("Downloading URL "+httpurl
);
1604 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1605 tmpfilename
= f
.name
;
1607 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
1608 except AttributeError:
1610 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1615 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1617 databytes
= geturls_text
.read(buffersize
);
1618 if not databytes
: break;
1619 datasize
= len(databytes
);
1620 fulldatasize
= datasize
+ fulldatasize
;
1623 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1624 downloaddiff
= fulldatasize
- prevdownsize
;
1625 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1626 prevdownsize
= fulldatasize
;
1629 geturls_text
.close();
1630 exec_time_end
= time
.time();
1631 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1632 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1635 if(not havehttplib2
):
1636 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1637 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
1641 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1642 global geturls_download_sleep
;
1644 sleep
= geturls_download_sleep
;
1645 if(not outfile
=="-"):
1646 outpath
= outpath
.rstrip(os
.path
.sep
);
1647 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1648 if(not os
.path
.exists(outpath
)):
1649 os
.makedirs(outpath
);
1650 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1652 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1654 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1655 if(not pretmpfilename
):
1657 tmpfilename
= pretmpfilename
['Filename'];
1658 downloadsize
= os
.path
.getsize(tmpfilename
);
1660 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1661 exec_time_start
= time
.time();
1662 shutil
.move(tmpfilename
, filepath
);
1664 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1665 except AttributeError:
1667 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1672 exec_time_end
= time
.time();
1673 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1674 if(os
.path
.exists(tmpfilename
)):
1675 os
.remove(tmpfilename
);
1676 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1677 if(outfile
=="-" and sys
.version
[0]=="2"):
1678 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1679 if(not pretmpfilename
):
1681 tmpfilename
= pretmpfilename
['Filename'];
1682 downloadsize
= os
.path
.getsize(tmpfilename
);
1685 exec_time_start
= time
.time();
1686 with
open(tmpfilename
, 'rb') as ft
:
1689 databytes
= ft
.read(buffersize
[1]);
1690 if not databytes
: break;
1691 datasize
= len(databytes
);
1692 fulldatasize
= datasize
+ fulldatasize
;
1695 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1696 downloaddiff
= fulldatasize
- prevdownsize
;
1697 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1698 prevdownsize
= fulldatasize
;
1701 fdata
= f
.getvalue();
1704 os
.remove(tmpfilename
);
1705 exec_time_end
= time
.time();
1706 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1707 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1708 if(outfile
=="-" and sys
.version
[0]>="3"):
1709 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1710 tmpfilename
= pretmpfilename
['Filename'];
1711 downloadsize
= os
.path
.getsize(tmpfilename
);
1714 exec_time_start
= time
.time();
1715 with
open(tmpfilename
, 'rb') as ft
:
1718 databytes
= ft
.read(buffersize
[1]);
1719 if not databytes
: break;
1720 datasize
= len(databytes
);
1721 fulldatasize
= datasize
+ fulldatasize
;
1724 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1725 downloaddiff
= fulldatasize
- prevdownsize
;
1726 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1727 prevdownsize
= fulldatasize
;
1730 fdata
= f
.getvalue();
1733 os
.remove(tmpfilename
);
1734 exec_time_end
= time
.time();
1735 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1736 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1739 if(not havehttplib2
):
1740 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1741 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
1744 def download_from_url_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1745 global geturls_download_sleep
, havebrotli
;
1747 sleep
= geturls_download_sleep
;
1748 urlparts
= urlparse
.urlparse(httpurl
);
1749 if(isinstance(httpheaders
, list)):
1750 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1751 httpheaders
= fix_header_names(httpheaders
);
1752 if(httpuseragent
is not None):
1753 if('User-Agent' in httpheaders
):
1754 httpheaders
['User-Agent'] = httpuseragent
;
1756 httpuseragent
.update({'User-Agent': httpuseragent
});
1757 if(httpreferer
is not None):
1758 if('Referer' in httpheaders
):
1759 httpheaders
['Referer'] = httpreferer
;
1761 httpuseragent
.update({'Referer': httpreferer
});
1762 if(urlparts
.username
is not None or urlparts
.password
is not None):
1763 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1764 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1765 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1766 if(isinstance(httpheaders
, dict)):
1767 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1768 geturls_opener
.addheaders
= httpheaders
;
1769 install_opener(geturls_opener
);
1771 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1772 if(postdata
is not None and not isinstance(postdata
, dict)):
1773 postdata
= urlencode(postdata
);
1775 if(httpmethod
=="GET"):
1776 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1777 geturls_text
= urlopen(geturls_request
);
1778 elif(httpmethod
=="POST"):
1779 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1780 geturls_text
= urlopen(geturls_request
, data
=postdata
);
1782 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1783 geturls_text
= urlopen(geturls_request
);
1784 except HTTPError
as geturls_text_error
:
1785 geturls_text
= geturls_text_error
;
1786 log
.info("Error With URL "+httpurl
);
1788 log
.info("Error With URL "+httpurl
);
1790 except socket
.timeout
:
1791 log
.info("Error With URL "+httpurl
);
1793 httpcodeout
= geturls_text
.getcode();
1794 httpcodereason
= geturls_text
.reason
;
1795 httpversionout
= "1.1";
1796 httpmethodout
= httpmethod
;
1797 httpurlout
= geturls_text
.geturl();
1798 httpheaderout
= geturls_text
.headers
;
1799 httpheadersentout
= httpheaders
;
1800 if(isinstance(httpheaderout
, list)):
1801 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1802 if(sys
.version
[0]=="2"):
1804 prehttpheaderout
= httpheaderout
;
1805 httpheaderkeys
= httpheaderout
.keys();
1806 imax
= len(httpheaderkeys
);
1810 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1812 except AttributeError:
1814 httpheaderout
= fix_header_names(httpheaderout
);
1815 if(isinstance(httpheadersentout
, list)):
1816 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1817 httpheadersentout
= fix_header_names(httpheadersentout
);
1818 log
.info("Downloading URL "+httpurl
);
1819 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
1820 if(sys
.version
[0]=="2"):
1821 strbuf
= StringIO(geturls_text
.read());
1822 if(sys
.version
[0]>="3"):
1823 strbuf
= BytesIO(geturls_text
.read());
1824 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1825 returnval_content
= gzstrbuf
.read()[:];
1826 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
1827 returnval_content
= geturls_text
.read()[:];
1828 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1829 returnval_content
= geturls_text
.read()[:];
1830 returnval_content
= brotli
.decompress(returnval_content
);
1831 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1832 geturls_text
.close();
1835 def download_from_url_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1836 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1837 exec_time_start
= time
.time();
1838 myhash
= hashlib
.new("sha1");
1839 if(sys
.version
[0]=="2"):
1840 myhash
.update(httpurl
);
1841 myhash
.update(str(buffersize
));
1842 myhash
.update(str(exec_time_start
));
1843 if(sys
.version
[0]>="3"):
1844 myhash
.update(httpurl
.encode('utf-8'));
1845 myhash
.update(str(buffersize
).encode('utf-8'));
1846 myhash
.update(str(exec_time_start
).encode('utf-8'));
1847 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1849 sleep
= geturls_download_sleep
;
1850 urlparts
= urlparse
.urlparse(httpurl
);
1851 if(isinstance(httpheaders
, list)):
1852 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1853 httpheaders
= fix_header_names(httpheaders
);
1854 if(httpuseragent
is not None):
1855 if('User-Agent' in httpheaders
):
1856 httpheaders
['User-Agent'] = httpuseragent
;
1858 httpuseragent
.update({'User-Agent': httpuseragent
});
1859 if(httpreferer
is not None):
1860 if('Referer' in httpheaders
):
1861 httpheaders
['Referer'] = httpreferer
;
1863 httpuseragent
.update({'Referer': httpreferer
});
1864 if(urlparts
.username
is not None or urlparts
.password
is not None):
1865 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1866 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1867 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1868 if(isinstance(httpheaders
, dict)):
1869 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1870 geturls_opener
.addheaders
= httpheaders
;
1871 install_opener(geturls_opener
);
1873 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1874 if(postdata
is not None and not isinstance(postdata
, dict)):
1875 postdata
= urlencode(postdata
);
1877 if(httpmethod
=="GET"):
1878 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1879 geturls_text
= urlopen(geturls_request
);
1880 elif(httpmethod
=="POST"):
1881 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1882 geturls_text
= urlopen(geturls_request
, data
=postdata
);
1884 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1885 geturls_text
= urlopen(geturls_request
);
1886 except HTTPError
as geturls_text_error
:
1887 geturls_text
= geturls_text_error
;
1888 log
.info("Error With URL "+httpurl
);
1890 log
.info("Error With URL "+httpurl
);
1892 except socket
.timeout
:
1893 log
.info("Error With URL "+httpurl
);
1895 httpcodeout
= geturls_text
.getcode();
1896 httpcodereason
= geturls_text
.reason
;
1897 httpversionout
= "1.1";
1898 httpmethodout
= httpmethod
;
1899 httpurlout
= geturls_text
.geturl();
1900 httpheaderout
= geturls_text
.headers
;
1901 httpheadersentout
= httpheaders
;
1902 if(isinstance(httpheaderout
, list)):
1903 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1904 if(sys
.version
[0]=="2"):
1906 prehttpheaderout
= httpheaderout
;
1907 httpheaderkeys
= httpheaderout
.keys();
1908 imax
= len(httpheaderkeys
);
1912 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1914 except AttributeError:
1916 httpheaderout
= fix_header_names(httpheaderout
);
1917 if(isinstance(httpheadersentout
, list)):
1918 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1919 httpheadersentout
= fix_header_names(httpheadersentout
);
1920 downloadsize
= httpheaderout
.get('Content-Length');
1921 if(downloadsize
is not None):
1922 downloadsize
= int(downloadsize
);
1923 if downloadsize
is None: downloadsize
= 0;
1926 log
.info("Downloading URL "+httpurl
);
1927 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1928 tmpfilename
= f
.name
;
1930 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
1931 except AttributeError:
1933 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1938 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1940 databytes
= geturls_text
.read(buffersize
);
1941 if not databytes
: break;
1942 datasize
= len(databytes
);
1943 fulldatasize
= datasize
+ fulldatasize
;
1946 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1947 downloaddiff
= fulldatasize
- prevdownsize
;
1948 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1949 prevdownsize
= fulldatasize
;
1952 geturls_text
.close();
1953 exec_time_end
= time
.time();
1954 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1955 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1958 def download_from_url_to_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1959 global geturls_download_sleep
;
1961 sleep
= geturls_download_sleep
;
1962 if(not outfile
=="-"):
1963 outpath
= outpath
.rstrip(os
.path
.sep
);
1964 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1965 if(not os
.path
.exists(outpath
)):
1966 os
.makedirs(outpath
);
1967 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1969 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1971 pretmpfilename
= download_from_url_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1972 if(not pretmpfilename
):
1974 tmpfilename
= pretmpfilename
['Filename'];
1975 downloadsize
= os
.path
.getsize(tmpfilename
);
1977 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1978 exec_time_start
= time
.time();
1979 shutil
.move(tmpfilename
, filepath
);
1981 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1982 except AttributeError:
1984 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1989 exec_time_end
= time
.time();
1990 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1991 if(os
.path
.exists(tmpfilename
)):
1992 os
.remove(tmpfilename
);
1993 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent':pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1994 if(outfile
=="-" and sys
.version
[0]=="2"):
1995 pretmpfilename
= download_from_url_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1996 if(not pretmpfilename
):
1998 tmpfilename
= pretmpfilename
['Filename'];
1999 downloadsize
= os
.path
.getsize(tmpfilename
);
2002 exec_time_start
= time
.time();
2003 with
open(tmpfilename
, 'rb') as ft
:
2006 databytes
= ft
.read(buffersize
[1]);
2007 if not databytes
: break;
2008 datasize
= len(databytes
);
2009 fulldatasize
= datasize
+ fulldatasize
;
2012 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2013 downloaddiff
= fulldatasize
- prevdownsize
;
2014 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2015 prevdownsize
= fulldatasize
;
2018 fdata
= f
.getvalue();
2021 os
.remove(tmpfilename
);
2022 exec_time_end
= time
.time();
2023 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2024 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2025 if(outfile
=="-" and sys
.version
[0]>="3"):
2026 pretmpfilename
= download_from_url_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2027 tmpfilename
= pretmpfilename
['Filename'];
2028 downloadsize
= os
.path
.getsize(tmpfilename
);
2031 exec_time_start
= time
.time();
2032 with
open(tmpfilename
, 'rb') as ft
:
2035 databytes
= ft
.read(buffersize
[1]);
2036 if not databytes
: break;
2037 datasize
= len(databytes
);
2038 fulldatasize
= datasize
+ fulldatasize
;
2041 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2042 downloaddiff
= fulldatasize
- prevdownsize
;
2043 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2044 prevdownsize
= fulldatasize
;
2047 fdata
= f
.getvalue();
2050 os
.remove(tmpfilename
);
2051 exec_time_end
= time
.time();
2052 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2053 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2057 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2058 global geturls_download_sleep
, havebrotli
;
2060 sleep
= geturls_download_sleep
;
2061 urlparts
= urlparse
.urlparse(httpurl
);
2062 if(isinstance(httpheaders
, list)):
2063 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2064 httpheaders
= fix_header_names(httpheaders
);
2065 if(httpuseragent
is not None):
2066 if('User-Agent' in httpheaders
):
2067 httpheaders
['User-Agent'] = httpuseragent
;
2069 httpuseragent
.update({'User-Agent': httpuseragent
});
2070 if(httpreferer
is not None):
2071 if('Referer' in httpheaders
):
2072 httpheaders
['Referer'] = httpreferer
;
2074 httpuseragent
.update({'Referer': httpreferer
});
2075 if(urlparts
.username
is not None or urlparts
.password
is not None):
2076 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2077 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2079 if(postdata
is not None and not isinstance(postdata
, dict)):
2080 postdata
= urlencode(postdata
);
2082 reqsession
= requests
.Session();
2083 if(httpmethod
=="GET"):
2084 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
2085 elif(httpmethod
=="POST"):
2086 geturls_text
= reqsession
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
2088 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
2089 except requests
.exceptions
.ConnectTimeout
:
2090 log
.info("Error With URL "+httpurl
);
2092 except requests
.exceptions
.ConnectError
:
2093 log
.info("Error With URL "+httpurl
);
2095 except socket
.timeout
:
2096 log
.info("Error With URL "+httpurl
);
2098 httpcodeout
= geturls_text
.status_code
;
2099 httpcodereason
= geturls_text
.reason
;
2100 if(geturls_text
.raw
.version
=="10"):
2101 httpversionout
= "1.0";
2103 httpversionout
= "1.1";
2104 httpmethodout
= httpmethod
;
2105 httpurlout
= geturls_text
.url
;
2106 httpheaderout
= geturls_text
.headers
;
2107 httpheadersentout
= geturls_text
.request
.headers
;
2108 if(isinstance(httpheaderout
, list)):
2109 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2110 if(sys
.version
[0]=="2"):
2112 prehttpheaderout
= httpheaderout
;
2113 httpheaderkeys
= httpheaderout
.keys();
2114 imax
= len(httpheaderkeys
);
2118 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2120 except AttributeError:
2122 httpheaderout
= fix_header_names(httpheaderout
);
2123 if(isinstance(httpheadersentout
, list)):
2124 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2125 httpheadersentout
= fix_header_names(httpheadersentout
);
2126 log
.info("Downloading URL "+httpurl
);
2127 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
2128 if(sys
.version
[0]=="2"):
2129 strbuf
= StringIO(geturls_text
.raw
.read());
2130 if(sys
.version
[0]>="3"):
2131 strbuf
= BytesIO(geturls_text
.raw
.read());
2132 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
2133 returnval_content
= gzstrbuf
.read()[:];
2134 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
2135 returnval_content
= geturls_text
.raw
.read()[:];
2136 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2137 returnval_content
= geturls_text
.raw
.read()[:];
2138 returnval_content
= brotli
.decompress(returnval_content
);
2139 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
2140 geturls_text
.close();
2143 if(not haverequests
):
2144 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2145 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
)
2149 def download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2150 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
2151 exec_time_start
= time
.time();
2152 myhash
= hashlib
.new("sha1");
2153 if(sys
.version
[0]=="2"):
2154 myhash
.update(httpurl
);
2155 myhash
.update(str(buffersize
));
2156 myhash
.update(str(exec_time_start
));
2157 if(sys
.version
[0]>="3"):
2158 myhash
.update(httpurl
.encode('utf-8'));
2159 myhash
.update(str(buffersize
).encode('utf-8'));
2160 myhash
.update(str(exec_time_start
).encode('utf-8'));
2161 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2163 sleep
= geturls_download_sleep
;
2164 urlparts
= urlparse
.urlparse(httpurl
);
2165 if(isinstance(httpheaders
, list)):
2166 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2167 httpheaders
= fix_header_names(httpheaders
);
2168 if(httpuseragent
is not None):
2169 if('User-Agent' in httpheaders
):
2170 httpheaders
['User-Agent'] = httpuseragent
;
2172 httpuseragent
.update({'User-Agent': httpuseragent
});
2173 if(httpreferer
is not None):
2174 if('Referer' in httpheaders
):
2175 httpheaders
['Referer'] = httpreferer
;
2177 httpuseragent
.update({'Referer': httpreferer
});
2178 if(urlparts
.username
is not None or urlparts
.password
is not None):
2179 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2180 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2182 if(postdata
is not None and not isinstance(postdata
, dict)):
2183 postdata
= urlencode(postdata
);
2185 reqsession
= requests
.Session();
2186 if(httpmethod
=="GET"):
2187 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
2188 elif(httpmethod
=="POST"):
2189 geturls_text
= reqsession
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
2191 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
2192 except requests
.exceptions
.ConnectTimeout
:
2193 log
.info("Error With URL "+httpurl
);
2195 except requests
.exceptions
.ConnectError
:
2196 log
.info("Error With URL "+httpurl
);
2198 except socket
.timeout
:
2199 log
.info("Error With URL "+httpurl
);
2201 httpcodeout
= geturls_text
.status_code
;
2202 httpcodereason
= geturls_text
.reason
;
2203 if(geturls_text
.raw
.version
=="10"):
2204 httpversionout
= "1.0";
2206 httpversionout
= "1.1";
2207 httpmethodout
= httpmethod
;
2208 httpurlout
= geturls_text
.url
;
2209 httpheaderout
= geturls_text
.headers
;
2210 httpheadersentout
= geturls_text
.request
.headers
;
2211 if(isinstance(httpheaderout
, list)):
2212 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2213 if(sys
.version
[0]=="2"):
2215 prehttpheaderout
= httpheaderout
;
2216 httpheaderkeys
= httpheaderout
.keys();
2217 imax
= len(httpheaderkeys
);
2221 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2223 except AttributeError:
2225 httpheaderout
= fix_header_names(httpheaderout
);
2226 if(isinstance(httpheadersentout
, list)):
2227 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2228 httpheadersentout
= fix_header_names(httpheadersentout
);
2229 downloadsize
= int(httpheaderout
.get('Content-Length'));
2230 if(downloadsize
is not None):
2231 downloadsize
= int(downloadsize
);
2232 if downloadsize
is None: downloadsize
= 0;
2235 log
.info("Downloading URL "+httpurl
);
2236 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2237 tmpfilename
= f
.name
;
2239 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
2240 except AttributeError:
2242 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2247 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
2248 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
2249 datasize
= len(databytes
);
2250 fulldatasize
= datasize
+ fulldatasize
;
2253 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2254 downloaddiff
= fulldatasize
- prevdownsize
;
2255 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2256 prevdownsize
= fulldatasize
;
2259 geturls_text
.close();
2260 exec_time_end
= time
.time();
2261 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2262 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2265 if(not haverequests
):
2266 def download_from_url_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2267 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
2271 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2272 global geturls_download_sleep
;
2274 sleep
= geturls_download_sleep
;
2275 if(not outfile
=="-"):
2276 outpath
= outpath
.rstrip(os
.path
.sep
);
2277 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2278 if(not os
.path
.exists(outpath
)):
2279 os
.makedirs(outpath
);
2280 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2282 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2284 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2285 if(not pretmpfilename
):
2287 tmpfilename
= pretmpfilename
['Filename'];
2288 downloadsize
= os
.path
.getsize(tmpfilename
);
2290 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2291 exec_time_start
= time
.time();
2292 shutil
.move(tmpfilename
, filepath
);
2294 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2295 except AttributeError:
2297 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2302 exec_time_end
= time
.time();
2303 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2304 if(os
.path
.exists(tmpfilename
)):
2305 os
.remove(tmpfilename
);
2306 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2307 if(outfile
=="-" and sys
.version
[0]=="2"):
2308 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2309 if(not pretmpfilename
):
2311 tmpfilename
= pretmpfilename
['Filename'];
2312 downloadsize
= os
.path
.getsize(tmpfilename
);
2315 exec_time_start
= time
.time();
2316 with
open(tmpfilename
, 'rb') as ft
:
2319 databytes
= ft
.read(buffersize
[1]);
2320 if not databytes
: break;
2321 datasize
= len(databytes
);
2322 fulldatasize
= datasize
+ fulldatasize
;
2325 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2326 downloaddiff
= fulldatasize
- prevdownsize
;
2327 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2328 prevdownsize
= fulldatasize
;
2331 fdata
= f
.getvalue();
2334 os
.remove(tmpfilename
);
2335 exec_time_end
= time
.time();
2336 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2337 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2338 if(outfile
=="-" and sys
.version
[0]>="3"):
2339 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2340 tmpfilename
= pretmpfilename
['Filename'];
2341 downloadsize
= os
.path
.getsize(tmpfilename
);
2344 exec_time_start
= time
.time();
2345 with
open(tmpfilename
, 'rb') as ft
:
2348 databytes
= ft
.read(buffersize
[1]);
2349 if not databytes
: break;
2350 datasize
= len(databytes
);
2351 fulldatasize
= datasize
+ fulldatasize
;
2354 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2355 downloaddiff
= fulldatasize
- prevdownsize
;
2356 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2357 prevdownsize
= fulldatasize
;
2360 fdata
= f
.getvalue();
2363 os
.remove(tmpfilename
);
2364 exec_time_end
= time
.time();
2365 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2366 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2369 if(not haverequests
):
2370 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2371 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
2375 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2376 global geturls_download_sleep
, havebrotli
;
2378 sleep
= geturls_download_sleep
;
2379 urlparts
= urlparse
.urlparse(httpurl
);
2380 if(isinstance(httpheaders
, list)):
2381 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2382 httpheaders
= fix_header_names(httpheaders
);
2383 if(httpuseragent
is not None):
2384 if('User-Agent' in httpheaders
):
2385 httpheaders
['User-Agent'] = httpuseragent
;
2387 httpuseragent
.update({'User-Agent': httpuseragent
});
2388 if(httpreferer
is not None):
2389 if('Referer' in httpheaders
):
2390 httpheaders
['Referer'] = httpreferer
;
2392 httpuseragent
.update({'Referer': httpreferer
});
2393 if(urlparts
.username
is not None or urlparts
.password
is not None):
2394 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2395 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2397 if(postdata
is not None and not isinstance(postdata
, dict)):
2398 postdata
= urlencode(postdata
);
2400 if(httpmethod
=="GET"):
2401 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2402 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2403 elif(httpmethod
=="POST"):
2404 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2405 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2407 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2408 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2409 except httpx
.ConnectTimeout
:
2410 log
.info("Error With URL "+httpurl
);
2412 except httpx
.ConnectError
:
2413 log
.info("Error With URL "+httpurl
);
2415 except socket
.timeout
:
2416 log
.info("Error With URL "+httpurl
);
2418 httpcodeout
= geturls_text
.status_code
;
2419 httpcodereason
= geturls_text
.reason_phrase
;
2420 httpversionout
= geturls_text
.http_version
;
2421 httpmethodout
= httpmethod
;
2422 httpurlout
= str(geturls_text
.url
);
2423 httpheaderout
= geturls_text
.headers
;
2424 httpheadersentout
= geturls_text
.request
.headers
;
2425 if(isinstance(httpheaderout
, list)):
2426 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2427 if(sys
.version
[0]=="2"):
2429 prehttpheaderout
= httpheaderout
;
2430 httpheaderkeys
= httpheaderout
.keys();
2431 imax
= len(httpheaderkeys
);
2435 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2437 except AttributeError:
2439 httpheaderout
= fix_header_names(httpheaderout
);
2440 if(isinstance(httpheadersentout
, list)):
2441 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2442 httpheadersentout
= fix_header_names(httpheadersentout
);
2443 log
.info("Downloading URL "+httpurl
);
2444 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
2445 if(sys
.version
[0]=="2"):
2446 strbuf
= StringIO(geturls_text
.read());
2447 if(sys
.version
[0]>="3"):
2448 strbuf
= BytesIO(geturls_text
.read());
2449 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
2450 returnval_content
= gzstrbuf
.read()[:];
2451 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
2452 returnval_content
= geturls_text
.read()[:];
2453 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2454 returnval_content
= geturls_text
.read()[:];
2455 returnval_content
= brotli
.decompress(returnval_content
);
2456 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
2457 geturls_text
.close();
2461 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2462 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
)
2466 def download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2467 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
2468 exec_time_start
= time
.time();
2469 myhash
= hashlib
.new("sha1");
2470 if(sys
.version
[0]=="2"):
2471 myhash
.update(httpurl
);
2472 myhash
.update(str(buffersize
));
2473 myhash
.update(str(exec_time_start
));
2474 if(sys
.version
[0]>="3"):
2475 myhash
.update(httpurl
.encode('utf-8'));
2476 myhash
.update(str(buffersize
).encode('utf-8'));
2477 myhash
.update(str(exec_time_start
).encode('utf-8'));
2478 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2480 sleep
= geturls_download_sleep
;
2481 urlparts
= urlparse
.urlparse(httpurl
);
2482 if(isinstance(httpheaders
, list)):
2483 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2484 httpheaders
= fix_header_names(httpheaders
);
2485 if(httpuseragent
is not None):
2486 if('User-Agent' in httpheaders
):
2487 httpheaders
['User-Agent'] = httpuseragent
;
2489 httpuseragent
.update({'User-Agent': httpuseragent
});
2490 if(httpreferer
is not None):
2491 if('Referer' in httpheaders
):
2492 httpheaders
['Referer'] = httpreferer
;
2494 httpuseragent
.update({'Referer': httpreferer
});
2495 if(urlparts
.username
is not None or urlparts
.password
is not None):
2496 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2497 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2499 if(postdata
is not None and not isinstance(postdata
, dict)):
2500 postdata
= urlencode(postdata
);
2502 if(httpmethod
=="GET"):
2503 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2504 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2505 elif(httpmethod
=="POST"):
2506 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2507 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2509 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2510 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2511 except httpx
.ConnectTimeout
:
2512 log
.info("Error With URL "+httpurl
);
2514 except httpx
.ConnectError
:
2515 log
.info("Error With URL "+httpurl
);
2517 except socket
.timeout
:
2518 log
.info("Error With URL "+httpurl
);
2520 httpcodeout
= geturls_text
.status_code
;
2521 httpcodereason
= geturls_text
.reason_phrase
;
2522 httpversionout
= geturls_text
.http_version
;
2523 httpmethodout
= httpmethod
;
2524 httpurlout
= str(geturls_text
.url
);
2525 httpheaderout
= geturls_text
.headers
;
2526 httpheadersentout
= geturls_text
.request
.headers
;
2527 if(isinstance(httpheaderout
, list)):
2528 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2529 if(sys
.version
[0]=="2"):
2531 prehttpheaderout
= httpheaderout
;
2532 httpheaderkeys
= httpheaderout
.keys();
2533 imax
= len(httpheaderkeys
);
2537 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2539 except AttributeError:
2541 httpheaderout
= fix_header_names(httpheaderout
);
2542 if(isinstance(httpheadersentout
, list)):
2543 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2544 httpheadersentout
= fix_header_names(httpheadersentout
);
2545 downloadsize
= int(httpheaderout
.get('Content-Length'));
2546 if(downloadsize
is not None):
2547 downloadsize
= int(downloadsize
);
2548 if downloadsize
is None: downloadsize
= 0;
2551 log
.info("Downloading URL "+httpurl
);
2552 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2553 tmpfilename
= f
.name
;
2555 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
2556 except AttributeError:
2558 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2563 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
2564 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
2565 datasize
= len(databytes
);
2566 fulldatasize
= datasize
+ fulldatasize
;
2569 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2570 downloaddiff
= fulldatasize
- prevdownsize
;
2571 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2572 prevdownsize
= fulldatasize
;
2575 geturls_text
.close();
2576 exec_time_end
= time
.time();
2577 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2578 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2582 def download_from_url_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2583 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
2587 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2588 global geturls_download_sleep
;
2590 sleep
= geturls_download_sleep
;
2591 if(not outfile
=="-"):
2592 outpath
= outpath
.rstrip(os
.path
.sep
);
2593 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2594 if(not os
.path
.exists(outpath
)):
2595 os
.makedirs(outpath
);
2596 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2598 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2600 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2601 if(not pretmpfilename
):
2603 tmpfilename
= pretmpfilename
['Filename'];
2604 downloadsize
= os
.path
.getsize(tmpfilename
);
2606 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2607 exec_time_start
= time
.time();
2608 shutil
.move(tmpfilename
, filepath
);
2610 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2611 except AttributeError:
2613 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2618 exec_time_end
= time
.time();
2619 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2620 if(os
.path
.exists(tmpfilename
)):
2621 os
.remove(tmpfilename
);
2622 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2623 if(outfile
=="-" and sys
.version
[0]=="2"):
2624 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2625 if(not pretmpfilename
):
2627 tmpfilename
= pretmpfilename
['Filename'];
2628 downloadsize
= os
.path
.getsize(tmpfilename
);
2631 exec_time_start
= time
.time();
2632 with
open(tmpfilename
, 'rb') as ft
:
2635 databytes
= ft
.read(buffersize
[1]);
2636 if not databytes
: break;
2637 datasize
= len(databytes
);
2638 fulldatasize
= datasize
+ fulldatasize
;
2641 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2642 downloaddiff
= fulldatasize
- prevdownsize
;
2643 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2644 prevdownsize
= fulldatasize
;
2647 fdata
= f
.getvalue();
2650 os
.remove(tmpfilename
);
2651 exec_time_end
= time
.time();
2652 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2653 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2654 if(outfile
=="-" and sys
.version
[0]>="3"):
2655 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2656 tmpfilename
= pretmpfilename
['Filename'];
2657 downloadsize
= os
.path
.getsize(tmpfilename
);
2660 exec_time_start
= time
.time();
2661 with
open(tmpfilename
, 'rb') as ft
:
2664 databytes
= ft
.read(buffersize
[1]);
2665 if not databytes
: break;
2666 datasize
= len(databytes
);
2667 fulldatasize
= datasize
+ fulldatasize
;
2670 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2671 downloaddiff
= fulldatasize
- prevdownsize
;
2672 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2673 prevdownsize
= fulldatasize
;
2676 fdata
= f
.getvalue();
2679 os
.remove(tmpfilename
);
2680 exec_time_end
= time
.time();
2681 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2682 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2686 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2687 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
2691 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2692 global geturls_download_sleep
, havebrotli
;
2694 sleep
= geturls_download_sleep
;
2695 urlparts
= urlparse
.urlparse(httpurl
);
2696 if(isinstance(httpheaders
, list)):
2697 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2698 httpheaders
= fix_header_names(httpheaders
);
2699 if(httpuseragent
is not None):
2700 if('User-Agent' in httpheaders
):
2701 httpheaders
['User-Agent'] = httpuseragent
;
2703 httpuseragent
.update({'User-Agent': httpuseragent
});
2704 if(httpreferer
is not None):
2705 if('Referer' in httpheaders
):
2706 httpheaders
['Referer'] = httpreferer
;
2708 httpuseragent
.update({'Referer': httpreferer
});
2709 if(urlparts
.username
is not None or urlparts
.password
is not None):
2710 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2711 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2713 if(postdata
is not None and not isinstance(postdata
, dict)):
2714 postdata
= urlencode(postdata
);
2716 if(httpmethod
=="GET"):
2717 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2718 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2719 elif(httpmethod
=="POST"):
2720 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2721 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2723 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2724 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2725 except httpx
.ConnectTimeout
:
2726 log
.info("Error With URL "+httpurl
);
2728 except httpx
.ConnectError
:
2729 log
.info("Error With URL "+httpurl
);
2731 except socket
.timeout
:
2732 log
.info("Error With URL "+httpurl
);
2734 httpcodeout
= geturls_text
.status_code
;
2735 httpcodereason
= geturls_text
.reason_phrase
;
2736 httpversionout
= geturls_text
.http_version
;
2737 httpmethodout
= httpmethod
;
2738 httpurlout
= str(geturls_text
.url
);
2739 httpheaderout
= geturls_text
.headers
;
2740 httpheadersentout
= geturls_text
.request
.headers
;
2741 if(isinstance(httpheaderout
, list)):
2742 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2743 if(sys
.version
[0]=="2"):
2745 prehttpheaderout
= httpheaderout
;
2746 httpheaderkeys
= httpheaderout
.keys();
2747 imax
= len(httpheaderkeys
);
2751 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2753 except AttributeError:
2755 httpheaderout
= fix_header_names(httpheaderout
);
2756 if(isinstance(httpheadersentout
, list)):
2757 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2758 httpheadersentout
= fix_header_names(httpheadersentout
);
2759 log
.info("Downloading URL "+httpurl
);
2760 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
2761 if(sys
.version
[0]=="2"):
2762 strbuf
= StringIO(geturls_text
.read());
2763 if(sys
.version
[0]>="3"):
2764 strbuf
= BytesIO(geturls_text
.read());
2765 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
2766 returnval_content
= gzstrbuf
.read()[:];
2767 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
2768 returnval_content
= geturls_text
.read()[:];
2769 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2770 returnval_content
= geturls_text
.read()[:];
2771 returnval_content
= brotli
.decompress(returnval_content
);
2772 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
2773 geturls_text
.close();
2777 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2778 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
)
2782 def download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2783 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
2784 exec_time_start
= time
.time();
2785 myhash
= hashlib
.new("sha1");
2786 if(sys
.version
[0]=="2"):
2787 myhash
.update(httpurl
);
2788 myhash
.update(str(buffersize
));
2789 myhash
.update(str(exec_time_start
));
2790 if(sys
.version
[0]>="3"):
2791 myhash
.update(httpurl
.encode('utf-8'));
2792 myhash
.update(str(buffersize
).encode('utf-8'));
2793 myhash
.update(str(exec_time_start
).encode('utf-8'));
2794 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2796 sleep
= geturls_download_sleep
;
2797 urlparts
= urlparse
.urlparse(httpurl
);
2798 if(isinstance(httpheaders
, list)):
2799 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2800 httpheaders
= fix_header_names(httpheaders
);
2801 if(httpuseragent
is not None):
2802 if('User-Agent' in httpheaders
):
2803 httpheaders
['User-Agent'] = httpuseragent
;
2805 httpuseragent
.update({'User-Agent': httpuseragent
});
2806 if(httpreferer
is not None):
2807 if('Referer' in httpheaders
):
2808 httpheaders
['Referer'] = httpreferer
;
2810 httpuseragent
.update({'Referer': httpreferer
});
2811 if(urlparts
.username
is not None or urlparts
.password
is not None):
2812 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2813 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2815 if(postdata
is not None and not isinstance(postdata
, dict)):
2816 postdata
= urlencode(postdata
);
2818 if(httpmethod
=="GET"):
2819 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2820 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2821 elif(httpmethod
=="POST"):
2822 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2823 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2825 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2826 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2827 except httpx
.ConnectTimeout
:
2828 log
.info("Error With URL "+httpurl
);
2830 except httpx
.ConnectError
:
2831 log
.info("Error With URL "+httpurl
);
2833 except socket
.timeout
:
2834 log
.info("Error With URL "+httpurl
);
2836 httpcodeout
= geturls_text
.status_code
;
2837 httpcodereason
= geturls_text
.reason_phrase
;
2838 httpversionout
= geturls_text
.http_version
;
2839 httpmethodout
= httpmethod
;
2840 httpurlout
= str(geturls_text
.url
);
2841 httpheaderout
= geturls_text
.headers
;
2842 httpheadersentout
= geturls_text
.request
.headers
;
2843 if(isinstance(httpheaderout
, list)):
2844 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2845 if(sys
.version
[0]=="2"):
2847 prehttpheaderout
= httpheaderout
;
2848 httpheaderkeys
= httpheaderout
.keys();
2849 imax
= len(httpheaderkeys
);
2853 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2855 except AttributeError:
2857 httpheaderout
= fix_header_names(httpheaderout
);
2858 if(isinstance(httpheadersentout
, list)):
2859 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2860 httpheadersentout
= fix_header_names(httpheadersentout
);
2861 downloadsize
= int(httpheaderout
.get('Content-Length'));
2862 if(downloadsize
is not None):
2863 downloadsize
= int(downloadsize
);
2864 if downloadsize
is None: downloadsize
= 0;
2867 log
.info("Downloading URL "+httpurl
);
2868 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2869 tmpfilename
= f
.name
;
2871 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
2872 except AttributeError:
2874 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2879 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
2880 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
2881 datasize
= len(databytes
);
2882 fulldatasize
= datasize
+ fulldatasize
;
2885 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2886 downloaddiff
= fulldatasize
- prevdownsize
;
2887 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2888 prevdownsize
= fulldatasize
;
2891 geturls_text
.close();
2892 exec_time_end
= time
.time();
2893 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2894 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2898 def download_from_url_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2899 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
2903 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2904 global geturls_download_sleep
;
2906 sleep
= geturls_download_sleep
;
2907 if(not outfile
=="-"):
2908 outpath
= outpath
.rstrip(os
.path
.sep
);
2909 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2910 if(not os
.path
.exists(outpath
)):
2911 os
.makedirs(outpath
);
2912 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2914 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2916 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2917 if(not pretmpfilename
):
2919 tmpfilename
= pretmpfilename
['Filename'];
2920 downloadsize
= os
.path
.getsize(tmpfilename
);
2922 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2923 exec_time_start
= time
.time();
2924 shutil
.move(tmpfilename
, filepath
);
2926 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2927 except AttributeError:
2929 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2934 exec_time_end
= time
.time();
2935 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2936 if(os
.path
.exists(tmpfilename
)):
2937 os
.remove(tmpfilename
);
2938 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2939 if(outfile
=="-" and sys
.version
[0]=="2"):
2940 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2941 if(not pretmpfilename
):
2943 tmpfilename
= pretmpfilename
['Filename'];
2944 downloadsize
= os
.path
.getsize(tmpfilename
);
2947 exec_time_start
= time
.time();
2948 with
open(tmpfilename
, 'rb') as ft
:
2951 databytes
= ft
.read(buffersize
[1]);
2952 if not databytes
: break;
2953 datasize
= len(databytes
);
2954 fulldatasize
= datasize
+ fulldatasize
;
2957 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2958 downloaddiff
= fulldatasize
- prevdownsize
;
2959 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2960 prevdownsize
= fulldatasize
;
2963 fdata
= f
.getvalue();
2966 os
.remove(tmpfilename
);
2967 exec_time_end
= time
.time();
2968 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2969 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
2970 if(outfile
=="-" and sys
.version
[0]>="3"):
2971 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2972 tmpfilename
= pretmpfilename
['Filename'];
2973 downloadsize
= os
.path
.getsize(tmpfilename
);
2976 exec_time_start
= time
.time();
2977 with
open(tmpfilename
, 'rb') as ft
:
2980 databytes
= ft
.read(buffersize
[1]);
2981 if not databytes
: break;
2982 datasize
= len(databytes
);
2983 fulldatasize
= datasize
+ fulldatasize
;
2986 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2987 downloaddiff
= fulldatasize
- prevdownsize
;
2988 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2989 prevdownsize
= fulldatasize
;
2992 fdata
= f
.getvalue();
2995 os
.remove(tmpfilename
);
2996 exec_time_end
= time
.time();
2997 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2998 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3002 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3003 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
3007 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
3008 global geturls_download_sleep
, havebrotli
;
3010 sleep
= geturls_download_sleep
;
3011 urlparts
= urlparse
.urlparse(httpurl
);
3012 if(isinstance(httpheaders
, list)):
3013 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3014 httpheaders
= fix_header_names(httpheaders
);
3015 if(httpuseragent
is not None):
3016 if('User-Agent' in httpheaders
):
3017 httpheaders
['User-Agent'] = httpuseragent
;
3019 httpuseragent
.update({'User-Agent': httpuseragent
});
3020 if(httpreferer
is not None):
3021 if('Referer' in httpheaders
):
3022 httpheaders
['Referer'] = httpreferer
;
3024 httpuseragent
.update({'Referer': httpreferer
});
3025 if(urlparts
.username
is not None or urlparts
.password
is not None):
3026 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3027 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3029 if(postdata
is not None and not isinstance(postdata
, dict)):
3030 postdata
= urlencode(postdata
);
3032 if(httpmethod
=="GET"):
3033 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
3034 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3035 elif(httpmethod
=="POST"):
3036 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
3037 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
3039 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
3040 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3041 except httpcore
.ConnectTimeout
:
3042 log
.info("Error With URL "+httpurl
);
3044 except httpcore
.ConnectError
:
3045 log
.info("Error With URL "+httpurl
);
3047 except socket
.timeout
:
3048 log
.info("Error With URL "+httpurl
);
3050 httpcodeout
= geturls_text
.status
;
3051 httpcodereason
= http_status_to_reason(geturls_text
.status
);
3052 httpversionout
= "1.1";
3053 httpmethodout
= httpmethod
;
3054 httpurlout
= str(httpurl
);
3055 httpheaderout
= geturls_text
.headers
;
3056 httpheadersentout
= httpheaders
;
3057 if(isinstance(httpheaderout
, list)):
3058 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3059 if(sys
.version
[0]=="2"):
3061 prehttpheaderout
= httpheaderout
;
3062 httpheaderkeys
= httpheaderout
.keys();
3063 imax
= len(httpheaderkeys
);
3067 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3069 except AttributeError:
3071 httpheaderout
= fix_header_names(httpheaderout
);
3072 if(isinstance(httpheadersentout
, list)):
3073 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3074 httpheadersentout
= fix_header_names(httpheadersentout
);
3075 log
.info("Downloading URL "+httpurl
);
3076 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
3077 if(sys
.version
[0]=="2"):
3078 strbuf
= StringIO(geturls_text
.read());
3079 if(sys
.version
[0]>="3"):
3080 strbuf
= BytesIO(geturls_text
.read());
3081 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
3082 returnval_content
= gzstrbuf
.read()[:];
3083 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
3084 returnval_content
= geturls_text
.read()[:];
3085 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3086 returnval_content
= geturls_text
.read()[:];
3087 returnval_content
= brotli
.decompress(returnval_content
);
3088 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
3089 geturls_text
.close();
3092 if(not havehttpcore
):
3093 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
3094 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
)
3098 def download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
3099 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
3100 exec_time_start
= time
.time();
3101 myhash
= hashlib
.new("sha1");
3102 if(sys
.version
[0]=="2"):
3103 myhash
.update(httpurl
);
3104 myhash
.update(str(buffersize
));
3105 myhash
.update(str(exec_time_start
));
3106 if(sys
.version
[0]>="3"):
3107 myhash
.update(httpurl
.encode('utf-8'));
3108 myhash
.update(str(buffersize
).encode('utf-8'));
3109 myhash
.update(str(exec_time_start
).encode('utf-8'));
3110 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3112 sleep
= geturls_download_sleep
;
3113 urlparts
= urlparse
.urlparse(httpurl
);
3114 if(isinstance(httpheaders
, list)):
3115 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3116 httpheaders
= fix_header_names(httpheaders
);
3117 if(httpuseragent
is not None):
3118 if('User-Agent' in httpheaders
):
3119 httpheaders
['User-Agent'] = httpuseragent
;
3121 httpuseragent
.update({'User-Agent': httpuseragent
});
3122 if(httpreferer
is not None):
3123 if('Referer' in httpheaders
):
3124 httpheaders
['Referer'] = httpreferer
;
3126 httpuseragent
.update({'Referer': httpreferer
});
3127 if(urlparts
.username
is not None or urlparts
.password
is not None):
3128 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3129 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3131 if(postdata
is not None and not isinstance(postdata
, dict)):
3132 postdata
= urlencode(postdata
);
3134 if(httpmethod
=="GET"):
3135 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
3136 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3137 elif(httpmethod
=="POST"):
3138 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
3139 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
3141 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
3142 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3143 except httpcore
.ConnectTimeout
:
3144 log
.info("Error With URL "+httpurl
);
3146 except httpcore
.ConnectError
:
3147 log
.info("Error With URL "+httpurl
);
3149 except socket
.timeout
:
3150 log
.info("Error With URL "+httpurl
);
3152 httpcodeout
= geturls_text
.status
;
3153 httpcodereason
= http_status_to_reason(geturls_text
.status
);
3154 httpversionout
= "1.1";
3155 httpmethodout
= httpmethod
;
3156 httpurlout
= str(httpurl
);
3157 httpheaderout
= geturls_text
.headers
;
3158 httpheadersentout
= httpheaders
;
3159 if(isinstance(httpheaderout
, list)):
3160 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3161 if(sys
.version
[0]=="2"):
3163 prehttpheaderout
= httpheaderout
;
3164 httpheaderkeys
= httpheaderout
.keys();
3165 imax
= len(httpheaderkeys
);
3169 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3171 except AttributeError:
3173 httpheaderout
= fix_header_names(httpheaderout
);
3174 if(isinstance(httpheadersentout
, list)):
3175 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3176 httpheadersentout
= fix_header_names(httpheadersentout
);
3177 downloadsize
= int(httpheaderout
.get('Content-Length'));
3178 if(downloadsize
is not None):
3179 downloadsize
= int(downloadsize
);
3180 if downloadsize
is None: downloadsize
= 0;
3183 log
.info("Downloading URL "+httpurl
);
3184 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3185 tmpfilename
= f
.name
;
3187 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
3188 except AttributeError:
3190 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3195 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
3196 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
3197 datasize
= len(databytes
);
3198 fulldatasize
= datasize
+ fulldatasize
;
3201 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3202 downloaddiff
= fulldatasize
- prevdownsize
;
3203 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3204 prevdownsize
= fulldatasize
;
3207 geturls_text
.close();
3208 exec_time_end
= time
.time();
3209 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3210 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3213 if(not havehttpcore
):
3214 def download_from_url_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
3215 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
3219 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3220 global geturls_download_sleep
;
3222 sleep
= geturls_download_sleep
;
3223 if(not outfile
=="-"):
3224 outpath
= outpath
.rstrip(os
.path
.sep
);
3225 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3226 if(not os
.path
.exists(outpath
)):
3227 os
.makedirs(outpath
);
3228 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3230 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3232 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3233 if(not pretmpfilename
):
3235 tmpfilename
= pretmpfilename
['Filename'];
3236 downloadsize
= os
.path
.getsize(tmpfilename
);
3238 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3239 exec_time_start
= time
.time();
3240 shutil
.move(tmpfilename
, filepath
);
3242 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3243 except AttributeError:
3245 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3250 exec_time_end
= time
.time();
3251 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3252 if(os
.path
.exists(tmpfilename
)):
3253 os
.remove(tmpfilename
);
3254 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3255 if(outfile
=="-" and sys
.version
[0]=="2"):
3256 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3257 if(not pretmpfilename
):
3259 tmpfilename
= pretmpfilename
['Filename'];
3260 downloadsize
= os
.path
.getsize(tmpfilename
);
3263 exec_time_start
= time
.time();
3264 with
open(tmpfilename
, 'rb') as ft
:
3267 databytes
= ft
.read(buffersize
[1]);
3268 if not databytes
: break;
3269 datasize
= len(databytes
);
3270 fulldatasize
= datasize
+ fulldatasize
;
3273 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3274 downloaddiff
= fulldatasize
- prevdownsize
;
3275 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3276 prevdownsize
= fulldatasize
;
3279 fdata
= f
.getvalue();
3282 os
.remove(tmpfilename
);
3283 exec_time_end
= time
.time();
3284 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3285 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3286 if(outfile
=="-" and sys
.version
[0]>="3"):
3287 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3288 tmpfilename
= pretmpfilename
['Filename'];
3289 downloadsize
= os
.path
.getsize(tmpfilename
);
3292 exec_time_start
= time
.time();
3293 with
open(tmpfilename
, 'rb') as ft
:
3296 databytes
= ft
.read(buffersize
[1]);
3297 if not databytes
: break;
3298 datasize
= len(databytes
);
3299 fulldatasize
= datasize
+ fulldatasize
;
3302 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3303 downloaddiff
= fulldatasize
- prevdownsize
;
3304 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3305 prevdownsize
= fulldatasize
;
3308 fdata
= f
.getvalue();
3311 os
.remove(tmpfilename
);
3312 exec_time_end
= time
.time();
3313 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3314 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3317 if(not havehttpcore
):
3318 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3319 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
3323 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
3324 global geturls_download_sleep
, havebrotli
;
3326 sleep
= geturls_download_sleep
;
3327 urlparts
= urlparse
.urlparse(httpurl
);
3328 if(isinstance(httpheaders
, list)):
3329 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3330 httpheaders
= fix_header_names(httpheaders
);
3331 if(httpuseragent
is not None):
3332 if('User-Agent' in httpheaders
):
3333 httpheaders
['User-Agent'] = httpuseragent
;
3335 httpuseragent
.update({'User-Agent': httpuseragent
});
3336 if(httpreferer
is not None):
3337 if('Referer' in httpheaders
):
3338 httpheaders
['Referer'] = httpreferer
;
3340 httpuseragent
.update({'Referer': httpreferer
});
3341 if(urlparts
.username
is not None or urlparts
.password
is not None):
3342 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3343 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3345 if(postdata
is not None and not isinstance(postdata
, dict)):
3346 postdata
= urlencode(postdata
);
3348 if(httpmethod
=="GET"):
3349 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3350 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3351 elif(httpmethod
=="POST"):
3352 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3353 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
3355 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3356 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3357 except httpcore
.ConnectTimeout
:
3358 log
.info("Error With URL "+httpurl
);
3360 except httpcore
.ConnectError
:
3361 log
.info("Error With URL "+httpurl
);
3363 except socket
.timeout
:
3364 log
.info("Error With URL "+httpurl
);
3366 httpcodeout
= geturls_text
.status
;
3367 httpcodereason
= geturls_text
.reason_phrase
;
3368 httpversionout
= "1.1";
3369 httpmethodout
= httpmethod
;
3370 httpurlout
= str(httpurl
);
3371 httpheaderout
= geturls_text
.headers
;
3372 httpheadersentout
= httpheaders
;
3373 if(isinstance(httpheaderout
, list)):
3374 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3375 if(sys
.version
[0]=="2"):
3377 prehttpheaderout
= httpheaderout
;
3378 httpheaderkeys
= httpheaderout
.keys();
3379 imax
= len(httpheaderkeys
);
3383 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3385 except AttributeError:
3387 httpheaderout
= fix_header_names(httpheaderout
);
3388 if(isinstance(httpheadersentout
, list)):
3389 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3390 httpheadersentout
= fix_header_names(httpheadersentout
);
3391 log
.info("Downloading URL "+httpurl
);
3392 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
3393 if(sys
.version
[0]=="2"):
3394 strbuf
= StringIO(geturls_text
.read());
3395 if(sys
.version
[0]>="3"):
3396 strbuf
= BytesIO(geturls_text
.read());
3397 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
3398 returnval_content
= gzstrbuf
.read()[:];
3399 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
3400 returnval_content
= geturls_text
.read()[:];
3401 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3402 returnval_content
= geturls_text
.read()[:];
3403 returnval_content
= brotli
.decompress(returnval_content
);
3404 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
3405 geturls_text
.close();
3408 if(not havehttpcore
):
3409 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
3410 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
)
3414 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
3415 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
3416 exec_time_start
= time
.time();
3417 myhash
= hashlib
.new("sha1");
3418 if(sys
.version
[0]=="2"):
3419 myhash
.update(httpurl
);
3420 myhash
.update(str(buffersize
));
3421 myhash
.update(str(exec_time_start
));
3422 if(sys
.version
[0]>="3"):
3423 myhash
.update(httpurl
.encode('utf-8'));
3424 myhash
.update(str(buffersize
).encode('utf-8'));
3425 myhash
.update(str(exec_time_start
).encode('utf-8'));
3426 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3428 sleep
= geturls_download_sleep
;
3429 urlparts
= urlparse
.urlparse(httpurl
);
3430 if(isinstance(httpheaders
, list)):
3431 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3432 httpheaders
= fix_header_names(httpheaders
);
3433 if(httpuseragent
is not None):
3434 if('User-Agent' in httpheaders
):
3435 httpheaders
['User-Agent'] = httpuseragent
;
3437 httpuseragent
.update({'User-Agent': httpuseragent
});
3438 if(httpreferer
is not None):
3439 if('Referer' in httpheaders
):
3440 httpheaders
['Referer'] = httpreferer
;
3442 httpuseragent
.update({'Referer': httpreferer
});
3443 if(urlparts
.username
is not None or urlparts
.password
is not None):
3444 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3445 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3447 if(postdata
is not None and not isinstance(postdata
, dict)):
3448 postdata
= urlencode(postdata
);
3450 if(httpmethod
=="GET"):
3451 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3452 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3453 elif(httpmethod
=="POST"):
3454 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3455 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
3457 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3458 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3459 except httpcore
.ConnectTimeout
:
3460 log
.info("Error With URL "+httpurl
);
3462 except httpcore
.ConnectError
:
3463 log
.info("Error With URL "+httpurl
);
3465 except socket
.timeout
:
3466 log
.info("Error With URL "+httpurl
);
3468 httpcodeout
= geturls_text
.status
;
3469 httpcodereason
= geturls_text
.reason_phrase
;
3470 httpversionout
= "1.1";
3471 httpmethodout
= httpmethod
;
3472 httpurlout
= str(httpurl
);
3473 httpheaderout
= geturls_text
.headers
;
3474 httpheadersentout
= httpheaders
;
3475 if(isinstance(httpheaderout
, list)):
3476 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3477 if(sys
.version
[0]=="2"):
3479 prehttpheaderout
= httpheaderout
;
3480 httpheaderkeys
= httpheaderout
.keys();
3481 imax
= len(httpheaderkeys
);
3485 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3487 except AttributeError:
3489 httpheaderout
= fix_header_names(httpheaderout
);
3490 if(isinstance(httpheadersentout
, list)):
3491 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3492 httpheadersentout
= fix_header_names(httpheadersentout
);
3493 downloadsize
= int(httpheaderout
.get('Content-Length'));
3494 if(downloadsize
is not None):
3495 downloadsize
= int(downloadsize
);
3496 if downloadsize
is None: downloadsize
= 0;
3499 log
.info("Downloading URL "+httpurl
);
3500 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3501 tmpfilename
= f
.name
;
3503 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
3504 except AttributeError:
3506 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3511 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
3512 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
3513 datasize
= len(databytes
);
3514 fulldatasize
= datasize
+ fulldatasize
;
3517 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3518 downloaddiff
= fulldatasize
- prevdownsize
;
3519 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3520 prevdownsize
= fulldatasize
;
3523 geturls_text
.close();
3524 exec_time_end
= time
.time();
3525 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3526 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3529 if(not havehttpcore
):
3530 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
3531 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
3535 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3536 global geturls_download_sleep
;
3538 sleep
= geturls_download_sleep
;
3539 if(not outfile
=="-"):
3540 outpath
= outpath
.rstrip(os
.path
.sep
);
3541 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3542 if(not os
.path
.exists(outpath
)):
3543 os
.makedirs(outpath
);
3544 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3546 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3548 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3549 if(not pretmpfilename
):
3551 tmpfilename
= pretmpfilename
['Filename'];
3552 downloadsize
= os
.path
.getsize(tmpfilename
);
3554 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3555 exec_time_start
= time
.time();
3556 shutil
.move(tmpfilename
, filepath
);
3558 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3559 except AttributeError:
3561 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3566 exec_time_end
= time
.time();
3567 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3568 if(os
.path
.exists(tmpfilename
)):
3569 os
.remove(tmpfilename
);
3570 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3571 if(outfile
=="-" and sys
.version
[0]=="2"):
3572 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3573 if(not pretmpfilename
):
3575 tmpfilename
= pretmpfilename
['Filename'];
3576 downloadsize
= os
.path
.getsize(tmpfilename
);
3579 exec_time_start
= time
.time();
3580 with
open(tmpfilename
, 'rb') as ft
:
3583 databytes
= ft
.read(buffersize
[1]);
3584 if not databytes
: break;
3585 datasize
= len(databytes
);
3586 fulldatasize
= datasize
+ fulldatasize
;
3589 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3590 downloaddiff
= fulldatasize
- prevdownsize
;
3591 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3592 prevdownsize
= fulldatasize
;
3595 fdata
= f
.getvalue();
3598 os
.remove(tmpfilename
);
3599 exec_time_end
= time
.time();
3600 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3601 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3602 if(outfile
=="-" and sys
.version
[0]>="3"):
3603 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3604 tmpfilename
= pretmpfilename
['Filename'];
3605 downloadsize
= os
.path
.getsize(tmpfilename
);
3608 exec_time_start
= time
.time();
3609 with
open(tmpfilename
, 'rb') as ft
:
3612 databytes
= ft
.read(buffersize
[1]);
3613 if not databytes
: break;
3614 datasize
= len(databytes
);
3615 fulldatasize
= datasize
+ fulldatasize
;
3618 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3619 downloaddiff
= fulldatasize
- prevdownsize
;
3620 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3621 prevdownsize
= fulldatasize
;
3624 fdata
= f
.getvalue();
3627 os
.remove(tmpfilename
);
3628 exec_time_end
= time
.time();
3629 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3630 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3634 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3635 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
3639 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
3640 global geturls_download_sleep
, havebrotli
;
3642 sleep
= geturls_download_sleep
;
3643 urlparts
= urlparse
.urlparse(httpurl
);
3644 if(isinstance(httpheaders
, list)):
3645 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3646 httpheaders
= fix_header_names(httpheaders
);
3647 if(httpuseragent
is not None):
3648 if('User-Agent' in httpheaders
):
3649 httpheaders
['User-Agent'] = httpuseragent
;
3651 httpuseragent
.update({'User-Agent': httpuseragent
});
3652 if(httpreferer
is not None):
3653 if('Referer' in httpheaders
):
3654 httpheaders
['Referer'] = httpreferer
;
3656 httpuseragent
.update({'Referer': httpreferer
});
3657 if(urlparts
.username
is not None or urlparts
.password
is not None):
3658 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3659 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3661 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
3662 if(postdata
is not None and not isinstance(postdata
, dict)):
3663 postdata
= urlencode(postdata
);
3665 if(httpmethod
=="GET"):
3666 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3667 elif(httpmethod
=="POST"):
3668 geturls_text
= geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
3670 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3671 except urllib3
.exceptions
.ConnectTimeoutError
:
3672 log
.info("Error With URL "+httpurl
);
3674 except urllib3
.exceptions
.ConnectError
:
3675 log
.info("Error With URL "+httpurl
);
3677 except urllib3
.exceptions
.MaxRetryError
:
3678 log
.info("Error With URL "+httpurl
);
3680 except socket
.timeout
:
3681 log
.info("Error With URL "+httpurl
);
3683 httpcodeout
= geturls_text
.status
;
3684 httpcodereason
= geturls_text
.reason
;
3685 if(geturls_text
.version
=="10"):
3686 httpversionout
= "1.0";
3688 httpversionout
= "1.1";
3689 httpmethodout
= httpmethod
;
3690 httpurlout
= geturls_text
.geturl();
3691 httpheaderout
= geturls_text
.info();
3692 httpheadersentout
= httpheaders
;
3693 if(isinstance(httpheaderout
, list)):
3694 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3695 if(sys
.version
[0]=="2"):
3697 prehttpheaderout
= httpheaderout
;
3698 httpheaderkeys
= httpheaderout
.keys();
3699 imax
= len(httpheaderkeys
);
3703 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3705 except AttributeError:
3707 httpheaderout
= fix_header_names(httpheaderout
);
3708 if(isinstance(httpheadersentout
, list)):
3709 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3710 httpheadersentout
= fix_header_names(httpheadersentout
);
3711 log
.info("Downloading URL "+httpurl
);
3712 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
3713 if(sys
.version
[0]=="2"):
3714 strbuf
= StringIO(geturls_text
.read());
3715 if(sys
.version
[0]>="3"):
3716 strbuf
= BytesIO(geturls_text
.read());
3717 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
3718 returnval_content
= gzstrbuf
.read()[:];
3719 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
3720 returnval_content
= geturls_text
.read()[:];
3721 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3722 returnval_content
= geturls_text
.read()[:];
3723 returnval_content
= brotli
.decompress(returnval_content
);
3724 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
3725 geturls_text
.close();
3728 if(not haveurllib3
):
3729 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
3730 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
)
3734 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
3735 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
3736 exec_time_start
= time
.time();
3737 myhash
= hashlib
.new("sha1");
3738 if(sys
.version
[0]=="2"):
3739 myhash
.update(httpurl
);
3740 myhash
.update(str(buffersize
));
3741 myhash
.update(str(exec_time_start
));
3742 if(sys
.version
[0]>="3"):
3743 myhash
.update(httpurl
.encode('utf-8'));
3744 myhash
.update(str(buffersize
).encode('utf-8'));
3745 myhash
.update(str(exec_time_start
).encode('utf-8'));
3746 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3748 sleep
= geturls_download_sleep
;
3749 urlparts
= urlparse
.urlparse(httpurl
);
3750 if(isinstance(httpheaders
, list)):
3751 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3752 httpheaders
= fix_header_names(httpheaders
);
3753 if(httpuseragent
is not None):
3754 if('User-Agent' in httpheaders
):
3755 httpheaders
['User-Agent'] = httpuseragent
;
3757 httpuseragent
.update({'User-Agent': httpuseragent
});
3758 if(httpreferer
is not None):
3759 if('Referer' in httpheaders
):
3760 httpheaders
['Referer'] = httpreferer
;
3762 httpuseragent
.update({'Referer': httpreferer
});
3763 if(urlparts
.username
is not None or urlparts
.password
is not None):
3764 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3765 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3767 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
3768 if(postdata
is not None and not isinstance(postdata
, dict)):
3769 postdata
= urlencode(postdata
);
3771 if(httpmethod
=="GET"):
3772 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3773 elif(httpmethod
=="POST"):
3774 geturls_text
= geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
3776 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3777 except urllib3
.exceptions
.ConnectTimeoutError
:
3778 log
.info("Error With URL "+httpurl
);
3780 except urllib3
.exceptions
.ConnectError
:
3781 log
.info("Error With URL "+httpurl
);
3783 except urllib3
.exceptions
.MaxRetryError
:
3784 log
.info("Error With URL "+httpurl
);
3786 except socket
.timeout
:
3787 log
.info("Error With URL "+httpurl
);
3789 httpcodeout
= geturls_text
.status
;
3790 httpcodereason
= geturls_text
.reason
;
3791 if(geturls_text
.version
=="10"):
3792 httpversionout
= "1.0";
3794 httpversionout
= "1.1";
3795 httpmethodout
= httpmethod
;
3796 httpurlout
= geturls_text
.geturl();
3797 httpheaderout
= geturls_text
.info();
3798 httpheadersentout
= httpheaders
;
3799 if(isinstance(httpheaderout
, list)):
3800 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3801 if(sys
.version
[0]=="2"):
3803 prehttpheaderout
= httpheaderout
;
3804 httpheaderkeys
= httpheaderout
.keys();
3805 imax
= len(httpheaderkeys
);
3809 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3811 except AttributeError:
3813 httpheaderout
= fix_header_names(httpheaderout
);
3814 if(isinstance(httpheadersentout
, list)):
3815 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3816 httpheadersentout
= fix_header_names(httpheadersentout
);
3817 downloadsize
= int(geturls_text
.headers
.get('Content-Length'));
3818 if(downloadsize
is not None):
3819 downloadsize
= int(downloadsize
);
3820 if downloadsize
is None: downloadsize
= 0;
3823 log
.info("Downloading URL "+httpurl
);
3824 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3825 tmpfilename
= f
.name
;
3827 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
3828 except AttributeError:
3830 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3835 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
3837 databytes
= geturls_text
.read(buffersize
);
3838 if not databytes
: break;
3839 datasize
= len(databytes
);
3840 fulldatasize
= datasize
+ fulldatasize
;
3843 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3844 downloaddiff
= fulldatasize
- prevdownsize
;
3845 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3846 prevdownsize
= fulldatasize
;
3849 geturls_text
.close();
3850 exec_time_end
= time
.time();
3851 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3852 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3855 if(not haveurllib3
):
3856 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
3857 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
3861 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3862 global geturls_download_sleep
;
3864 sleep
= geturls_download_sleep
;
3865 if(not outfile
=="-"):
3866 outpath
= outpath
.rstrip(os
.path
.sep
);
3867 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3868 if(not os
.path
.exists(outpath
)):
3869 os
.makedirs(outpath
);
3870 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3872 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3874 pretmpfilename
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3875 if(not pretmpfilename
):
3877 tmpfilename
= pretmpfilename
['Filename'];
3878 downloadsize
= os
.path
.getsize(tmpfilename
);
3880 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3881 exec_time_start
= time
.time();
3882 shutil
.move(tmpfilename
, filepath
);
3884 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3885 except AttributeError:
3887 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3892 exec_time_end
= time
.time();
3893 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3894 if(os
.path
.exists(tmpfilename
)):
3895 os
.remove(tmpfilename
);
3896 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3897 if(outfile
=="-" and sys
.version
[0]=="2"):
3898 pretmpfilename
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3899 if(not pretmpfilename
):
3901 tmpfilename
= pretmpfilename
['Filename'];
3902 downloadsize
= os
.path
.getsize(tmpfilename
);
3905 exec_time_start
= time
.time();
3906 with
open(tmpfilename
, 'rb') as ft
:
3909 databytes
= ft
.read(buffersize
[1]);
3910 if not databytes
: break;
3911 datasize
= len(databytes
);
3912 fulldatasize
= datasize
+ fulldatasize
;
3915 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3916 downloaddiff
= fulldatasize
- prevdownsize
;
3917 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3918 prevdownsize
= fulldatasize
;
3921 fdata
= f
.getvalue();
3924 os
.remove(tmpfilename
);
3925 exec_time_end
= time
.time();
3926 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3927 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3928 if(outfile
=="-" and sys
.version
[0]>="3"):
3929 pretmpfilename
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3930 tmpfilename
= pretmpfilename
['Filename'];
3931 downloadsize
= os
.path
.getsize(tmpfilename
);
3934 exec_time_start
= time
.time();
3935 with
open(tmpfilename
, 'rb') as ft
:
3938 databytes
= ft
.read(buffersize
[1]);
3939 if not databytes
: break;
3940 datasize
= len(databytes
);
3941 fulldatasize
= datasize
+ fulldatasize
;
3944 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3945 downloaddiff
= fulldatasize
- prevdownsize
;
3946 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3947 prevdownsize
= fulldatasize
;
3950 fdata
= f
.getvalue();
3953 os
.remove(tmpfilename
);
3954 exec_time_end
= time
.time();
3955 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3956 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
3959 if(not haveurllib3
):
3960 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3961 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
3965 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
3966 global geturls_download_sleep
, havebrotli
;
3968 sleep
= geturls_download_sleep
;
3969 urlparts
= urlparse
.urlparse(httpurl
);
3970 if(isinstance(httpheaders
, list)):
3971 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3972 httpheaders
= fix_header_names(httpheaders
);
3973 if(httpuseragent
is not None):
3974 if('User-Agent' in httpheaders
):
3975 httpheaders
['User-Agent'] = httpuseragent
;
3977 httpuseragent
.update({'User-Agent': httpuseragent
});
3978 if(httpreferer
is not None):
3979 if('Referer' in httpheaders
):
3980 httpheaders
['Referer'] = httpreferer
;
3982 httpuseragent
.update({'Referer': httpreferer
});
3983 if(urlparts
.username
is not None or urlparts
.password
is not None):
3984 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3985 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3987 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
3988 if(postdata
is not None and not isinstance(postdata
, dict)):
3989 postdata
= urlencode(postdata
);
3991 if(httpmethod
=="GET"):
3992 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3993 elif(httpmethod
=="POST"):
3994 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
3996 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3997 except urllib3
.exceptions
.ConnectTimeoutError
:
3998 log
.info("Error With URL "+httpurl
);
4000 except urllib3
.exceptions
.ConnectError
:
4001 log
.info("Error With URL "+httpurl
);
4003 except urllib3
.exceptions
.MaxRetryError
:
4004 log
.info("Error With URL "+httpurl
);
4006 except socket
.timeout
:
4007 log
.info("Error With URL "+httpurl
);
4009 httpcodeout
= geturls_text
.status
;
4010 httpcodereason
= geturls_text
.reason
;
4011 if(geturls_text
.version
=="10"):
4012 httpversionout
= "1.0";
4014 httpversionout
= "1.1";
4015 httpmethodout
= httpmethod
;
4016 httpurlout
= geturls_text
.geturl();
4017 httpheaderout
= geturls_text
.info();
4018 httpheadersentout
= httpheaders
;
4019 if(isinstance(httpheaderout
, list)):
4020 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
4021 if(sys
.version
[0]=="2"):
4023 prehttpheaderout
= httpheaderout
;
4024 httpheaderkeys
= httpheaderout
.keys();
4025 imax
= len(httpheaderkeys
);
4029 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4031 except AttributeError:
4033 httpheaderout
= fix_header_names(httpheaderout
);
4034 if(isinstance(httpheadersentout
, list)):
4035 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
4036 httpheadersentout
= fix_header_names(httpheadersentout
);
4037 log
.info("Downloading URL "+httpurl
);
4038 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
4039 if(sys
.version
[0]=="2"):
4040 strbuf
= StringIO(geturls_text
.read());
4041 if(sys
.version
[0]>="3"):
4042 strbuf
= BytesIO(geturls_text
.read());
4043 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
4044 returnval_content
= gzstrbuf
.read()[:];
4045 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
4046 returnval_content
= geturls_text
.read()[:];
4047 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4048 returnval_content
= geturls_text
.read()[:];
4049 returnval_content
= brotli
.decompress(returnval_content
);
4050 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
4051 geturls_text
.close();
4054 if(not haveurllib3
):
4055 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
4056 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
)
4060 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
4061 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
4062 exec_time_start
= time
.time();
4063 myhash
= hashlib
.new("sha1");
4064 if(sys
.version
[0]=="2"):
4065 myhash
.update(httpurl
);
4066 myhash
.update(str(buffersize
));
4067 myhash
.update(str(exec_time_start
));
4068 if(sys
.version
[0]>="3"):
4069 myhash
.update(httpurl
.encode('utf-8'));
4070 myhash
.update(str(buffersize
).encode('utf-8'));
4071 myhash
.update(str(exec_time_start
).encode('utf-8'));
4072 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4074 sleep
= geturls_download_sleep
;
4075 urlparts
= urlparse
.urlparse(httpurl
);
4076 if(isinstance(httpheaders
, list)):
4077 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4078 httpheaders
= fix_header_names(httpheaders
);
4079 if(httpuseragent
is not None):
4080 if('User-Agent' in httpheaders
):
4081 httpheaders
['User-Agent'] = httpuseragent
;
4083 httpuseragent
.update({'User-Agent': httpuseragent
});
4084 if(httpreferer
is not None):
4085 if('Referer' in httpheaders
):
4086 httpheaders
['Referer'] = httpreferer
;
4088 httpuseragent
.update({'Referer': httpreferer
});
4089 if(urlparts
.username
is not None or urlparts
.password
is not None):
4090 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4091 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4093 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
4094 if(postdata
is not None and not isinstance(postdata
, dict)):
4095 postdata
= urlencode(postdata
);
4097 if(httpmethod
=="GET"):
4098 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
4099 elif(httpmethod
=="POST"):
4100 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
4102 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
4103 except urllib3
.exceptions
.ConnectTimeoutError
:
4104 log
.info("Error With URL "+httpurl
);
4106 except urllib3
.exceptions
.ConnectError
:
4107 log
.info("Error With URL "+httpurl
);
4109 except urllib3
.exceptions
.MaxRetryError
:
4110 log
.info("Error With URL "+httpurl
);
4112 except socket
.timeout
:
4113 log
.info("Error With URL "+httpurl
);
4115 httpcodeout
= geturls_text
.status
;
4116 httpcodereason
= geturls_text
.reason
;
4117 if(geturls_text
.version
=="10"):
4118 httpversionout
= "1.0";
4120 httpversionout
= "1.1";
4121 httpmethodout
= httpmethod
;
4122 httpurlout
= geturls_text
.geturl();
4123 httpheaderout
= geturls_text
.info();
4124 httpheadersentout
= httpheaders
;
4125 if(isinstance(httpheaderout
, list)):
4126 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
4127 if(sys
.version
[0]=="2"):
4129 prehttpheaderout
= httpheaderout
;
4130 httpheaderkeys
= httpheaderout
.keys();
4131 imax
= len(httpheaderkeys
);
4135 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4137 except AttributeError:
4139 httpheaderout
= fix_header_names(httpheaderout
);
4140 if(isinstance(httpheadersentout
, list)):
4141 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
4142 httpheadersentout
= fix_header_names(httpheadersentout
);
4143 downloadsize
= int(geturls_text
.headers
.get('Content-Length'));
4144 if(downloadsize
is not None):
4145 downloadsize
= int(downloadsize
);
4146 if downloadsize
is None: downloadsize
= 0;
4149 log
.info("Downloading URL "+httpurl
);
4150 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4151 tmpfilename
= f
.name
;
4153 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
4154 except AttributeError:
4156 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4161 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
4163 databytes
= geturls_text
.read(buffersize
);
4164 if not databytes
: break;
4165 datasize
= len(databytes
);
4166 fulldatasize
= datasize
+ fulldatasize
;
4169 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4170 downloaddiff
= fulldatasize
- prevdownsize
;
4171 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4172 prevdownsize
= fulldatasize
;
4175 geturls_text
.close();
4176 exec_time_end
= time
.time();
4177 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4178 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4181 if(not haveurllib3
):
4182 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
4183 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
4187 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
4188 global geturls_download_sleep
;
4190 sleep
= geturls_download_sleep
;
4191 if(not outfile
=="-"):
4192 outpath
= outpath
.rstrip(os
.path
.sep
);
4193 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4194 if(not os
.path
.exists(outpath
)):
4195 os
.makedirs(outpath
);
4196 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4198 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4200 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4201 if(not pretmpfilename
):
4203 tmpfilename
= pretmpfilename
['Filename'];
4204 downloadsize
= os
.path
.getsize(tmpfilename
);
4206 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4207 exec_time_start
= time
.time();
4208 shutil
.move(tmpfilename
, filepath
);
4210 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4211 except AttributeError:
4213 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4218 exec_time_end
= time
.time();
4219 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4220 if(os
.path
.exists(tmpfilename
)):
4221 os
.remove(tmpfilename
);
4222 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4223 if(outfile
=="-" and sys
.version
[0]=="2"):
4224 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4225 if(not pretmpfilename
):
4227 tmpfilename
= pretmpfilename
['Filename'];
4228 downloadsize
= os
.path
.getsize(tmpfilename
);
4231 exec_time_start
= time
.time();
4232 with
open(tmpfilename
, 'rb') as ft
:
4235 databytes
= ft
.read(buffersize
[1]);
4236 if not databytes
: break;
4237 datasize
= len(databytes
);
4238 fulldatasize
= datasize
+ fulldatasize
;
4241 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4242 downloaddiff
= fulldatasize
- prevdownsize
;
4243 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4244 prevdownsize
= fulldatasize
;
4247 fdata
= f
.getvalue();
4250 os
.remove(tmpfilename
);
4251 exec_time_end
= time
.time();
4252 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4253 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4254 if(outfile
=="-" and sys
.version
[0]>="3"):
4255 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4256 tmpfilename
= pretmpfilename
['Filename'];
4257 downloadsize
= os
.path
.getsize(tmpfilename
);
4260 exec_time_start
= time
.time();
4261 with
open(tmpfilename
, 'rb') as ft
:
4264 databytes
= ft
.read(buffersize
[1]);
4265 if not databytes
: break;
4266 datasize
= len(databytes
);
4267 fulldatasize
= datasize
+ fulldatasize
;
4270 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4271 downloaddiff
= fulldatasize
- prevdownsize
;
4272 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4273 prevdownsize
= fulldatasize
;
4276 fdata
= f
.getvalue();
4279 os
.remove(tmpfilename
);
4280 exec_time_end
= time
.time();
4281 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4282 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4285 if(not haveurllib3
):
4286 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
4287 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
4291 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
4292 global geturls_download_sleep
, havebrotli
;
4294 sleep
= geturls_download_sleep
;
4295 urlparts
= urlparse
.urlparse(httpurl
);
4296 if(isinstance(httpheaders
, list)):
4297 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4298 httpheaders
= fix_header_names(httpheaders
);
4299 if(httpuseragent
is not None):
4300 if('User-Agent' in httpheaders
):
4301 httpheaders
['User-Agent'] = httpuseragent
;
4303 httpuseragent
.update({'User-Agent': httpuseragent
});
4304 if(httpreferer
is not None):
4305 if('Referer' in httpheaders
):
4306 httpheaders
['Referer'] = httpreferer
;
4308 httpuseragent
.update({'Referer': httpreferer
});
4309 if(urlparts
.username
is not None or urlparts
.password
is not None):
4310 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4311 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4312 geturls_opener
= mechanize
.Browser();
4313 if(isinstance(httpheaders
, dict)):
4314 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4316 geturls_opener
.addheaders
= httpheaders
;
4317 geturls_opener
.set_cookiejar(httpcookie
);
4318 geturls_opener
.set_handle_robots(False);
4319 if(postdata
is not None and not isinstance(postdata
, dict)):
4320 postdata
= urlencode(postdata
);
4322 if(httpmethod
=="GET"):
4323 geturls_text
= geturls_opener
.open(httpurl
);
4324 elif(httpmethod
=="POST"):
4325 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
4327 geturls_text
= geturls_opener
.open(httpurl
);
4328 except mechanize
.HTTPError
as geturls_text_error
:
4329 geturls_text
= geturls_text_error
;
4330 log
.info("Error With URL "+httpurl
);
4332 log
.info("Error With URL "+httpurl
);
4334 except socket
.timeout
:
4335 log
.info("Error With URL "+httpurl
);
4337 httpcodeout
= geturls_text
.code
;
4338 httpcodereason
= http_status_to_reason(geturls_text
.code
);
4339 httpversionout
= "1.1";
4340 httpmethodout
= httpmethod
;
4341 httpurlout
= geturls_text
.geturl();
4342 httpheaderout
= geturls_text
.info();
4343 reqhead
= geturls_opener
.request
;
4344 httpheadersentout
= reqhead
.header_items();
4345 if(isinstance(httpheaderout
, list)):
4346 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
4347 if(sys
.version
[0]=="2"):
4349 prehttpheaderout
= httpheaderout
;
4350 httpheaderkeys
= httpheaderout
.keys();
4351 imax
= len(httpheaderkeys
);
4355 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4357 except AttributeError:
4359 httpheaderout
= fix_header_names(httpheaderout
);
4360 if(isinstance(httpheadersentout
, list)):
4361 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
4362 httpheadersentout
= fix_header_names(httpheadersentout
);
4363 log
.info("Downloading URL "+httpurl
);
4364 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
4365 if(sys
.version
[0]=="2"):
4366 strbuf
= StringIO(geturls_text
.read());
4367 if(sys
.version
[0]>="3"):
4368 strbuf
= BytesIO(geturls_text
.read());
4369 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
4370 returnval_content
= gzstrbuf
.read()[:];
4371 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
4372 returnval_content
= geturls_text
.read()[:];
4373 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4374 returnval_content
= geturls_text
.read()[:];
4375 returnval_content
= brotli
.decompress(returnval_content
);
4376 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
4377 geturls_text
.close();
4380 if(not havemechanize
):
4381 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
4382 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
)
4386 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
4387 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
4388 exec_time_start
= time
.time();
4389 myhash
= hashlib
.new("sha1");
4390 if(sys
.version
[0]=="2"):
4391 myhash
.update(httpurl
);
4392 myhash
.update(str(buffersize
));
4393 myhash
.update(str(exec_time_start
));
4394 if(sys
.version
[0]>="3"):
4395 myhash
.update(httpurl
.encode('utf-8'));
4396 myhash
.update(str(buffersize
).encode('utf-8'));
4397 myhash
.update(str(exec_time_start
).encode('utf-8'));
4398 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4400 sleep
= geturls_download_sleep
;
4401 urlparts
= urlparse
.urlparse(httpurl
);
4402 if(isinstance(httpheaders
, list)):
4403 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4404 httpheaders
= fix_header_names(httpheaders
);
4405 if(httpuseragent
is not None):
4406 if('User-Agent' in httpheaders
):
4407 httpheaders
['User-Agent'] = httpuseragent
;
4409 httpuseragent
.update({'User-Agent': httpuseragent
});
4410 if(httpreferer
is not None):
4411 if('Referer' in httpheaders
):
4412 httpheaders
['Referer'] = httpreferer
;
4414 httpuseragent
.update({'Referer': httpreferer
});
4415 if(urlparts
.username
is not None or urlparts
.password
is not None):
4416 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4417 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4418 geturls_opener
= mechanize
.Browser();
4419 if(isinstance(httpheaders
, dict)):
4420 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4422 geturls_opener
.addheaders
= httpheaders
;
4423 geturls_opener
.set_cookiejar(httpcookie
);
4424 geturls_opener
.set_handle_robots(False);
4425 if(postdata
is not None and not isinstance(postdata
, dict)):
4426 postdata
= urlencode(postdata
);
4428 if(httpmethod
=="GET"):
4429 geturls_text
= geturls_opener
.open(httpurl
);
4430 elif(httpmethod
=="POST"):
4431 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
4433 geturls_text
= geturls_opener
.open(httpurl
);
4434 except mechanize
.HTTPError
as geturls_text_error
:
4435 geturls_text
= geturls_text_error
;
4436 log
.info("Error With URL "+httpurl
);
4438 log
.info("Error With URL "+httpurl
);
4440 except socket
.timeout
:
4441 log
.info("Error With URL "+httpurl
);
4443 httpcodeout
= geturls_text
.code
;
4444 httpcodereason
= http_status_to_reason(geturls_text
.code
);
4445 httpversionout
= "1.1";
4446 httpmethodout
= httpmethod
;
4447 httpurlout
= geturls_text
.geturl();
4448 httpheaderout
= geturls_text
.info();
4449 reqhead
= geturls_opener
.request
;
4450 httpheadersentout
= reqhead
.header_items();
4451 if(isinstance(httpheaderout
, list)):
4452 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
4453 if(sys
.version
[0]=="2"):
4455 prehttpheaderout
= httpheaderout
;
4456 httpheaderkeys
= httpheaderout
.keys();
4457 imax
= len(httpheaderkeys
);
4461 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4463 except AttributeError:
4465 httpheaderout
= fix_header_names(httpheaderout
);
4466 if(isinstance(httpheadersentout
, list)):
4467 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
4468 httpheadersentout
= fix_header_names(httpheadersentout
);
4469 downloadsize
= int(httpheaderout
.get('Content-Length'));
4470 if(downloadsize
is not None):
4471 downloadsize
= int(downloadsize
);
4472 if downloadsize
is None: downloadsize
= 0;
4475 log
.info("Downloading URL "+httpurl
);
4476 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4477 tmpfilename
= f
.name
;
4479 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
4480 except AttributeError:
4482 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4487 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
4489 databytes
= geturls_text
.read(buffersize
);
4490 if not databytes
: break;
4491 datasize
= len(databytes
);
4492 fulldatasize
= datasize
+ fulldatasize
;
4495 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4496 downloaddiff
= fulldatasize
- prevdownsize
;
4497 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4498 prevdownsize
= fulldatasize
;
4501 geturls_text
.close();
4502 exec_time_end
= time
.time();
4503 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4504 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4507 if(not havemechanize
):
4508 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
4509 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
4513 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
4514 global geturls_download_sleep
;
4516 sleep
= geturls_download_sleep
;
4517 if(not outfile
=="-"):
4518 outpath
= outpath
.rstrip(os
.path
.sep
);
4519 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4520 if(not os
.path
.exists(outpath
)):
4521 os
.makedirs(outpath
);
4522 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4524 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4526 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4527 if(not pretmpfilename
):
4529 tmpfilename
= pretmpfilename
['Filename'];
4530 downloadsize
= os
.path
.getsize(tmpfilename
);
4532 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4533 exec_time_start
= time
.time();
4534 shutil
.move(tmpfilename
, filepath
);
4536 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4537 except AttributeError:
4539 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4544 exec_time_end
= time
.time();
4545 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4546 if(os
.path
.exists(tmpfilename
)):
4547 os
.remove(tmpfilename
);
4548 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4549 if(outfile
=="-" and sys
.version
[0]=="2"):
4550 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4551 if(not pretmpfilename
):
4553 tmpfilename
= pretmpfilename
['Filename'];
4554 downloadsize
= os
.path
.getsize(tmpfilename
);
4557 exec_time_start
= time
.time();
4558 with
open(tmpfilename
, 'rb') as ft
:
4561 databytes
= ft
.read(buffersize
[1]);
4562 if not databytes
: break;
4563 datasize
= len(databytes
);
4564 fulldatasize
= datasize
+ fulldatasize
;
4567 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4568 downloaddiff
= fulldatasize
- prevdownsize
;
4569 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4570 prevdownsize
= fulldatasize
;
4573 fdata
= f
.getvalue();
4576 os
.remove(tmpfilename
);
4577 exec_time_end
= time
.time();
4578 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4579 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4580 if(outfile
=="-" and sys
.version
[0]>="3"):
4581 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4582 tmpfilename
= pretmpfilename
['Filename'];
4583 downloadsize
= os
.path
.getsize(tmpfilename
);
4586 exec_time_start
= time
.time();
4587 with
open(tmpfilename
, 'rb') as ft
:
4590 databytes
= ft
.read(buffersize
[1]);
4591 if not databytes
: break;
4592 datasize
= len(databytes
);
4593 fulldatasize
= datasize
+ fulldatasize
;
4596 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4597 downloaddiff
= fulldatasize
- prevdownsize
;
4598 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4599 prevdownsize
= fulldatasize
;
4602 fdata
= f
.getvalue();
4605 os
.remove(tmpfilename
);
4606 exec_time_end
= time
.time();
4607 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4608 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4611 if(not havemechanize
):
4612 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
4613 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
4616 def download_file_from_ftp_file(url
):
4617 urlparts
= urlparse
.urlparse(url
);
4618 file_name
= os
.path
.basename(urlparts
.path
);
4619 file_dir
= os
.path
.dirname(urlparts
.path
);
4620 if(urlparts
.username
is not None):
4621 ftp_username
= urlparts
.username
;
4623 ftp_username
= "anonymous";
4624 if(urlparts
.password
is not None):
4625 ftp_password
= urlparts
.password
;
4626 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4627 ftp_password
= "anonymous";
4630 if(urlparts
.scheme
=="ftp"):
4632 elif(urlparts
.scheme
=="ftps"):
4636 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4638 ftp_port
= urlparts
.port
;
4639 if(urlparts
.port
is None):
4642 ftp
.connect(urlparts
.hostname
, ftp_port
);
4643 except socket
.gaierror
:
4644 log
.info("Error With URL "+httpurl
);
4646 except socket
.timeout
:
4647 log
.info("Error With URL "+httpurl
);
4649 ftp
.login(urlparts
.username
, urlparts
.password
);
4650 if(urlparts
.scheme
=="ftps"):
4652 ftpfile
= BytesIO();
4653 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
4654 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
4659 def download_file_from_ftp_string(url
):
4660 ftpfile
= download_file_from_ftp_file(url
);
4661 return ftpfile
.read();
4663 def download_from_url_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
4664 global geturls_download_sleep
, havebrotli
;
4666 sleep
= geturls_download_sleep
;
4667 urlparts
= urlparse
.urlparse(httpurl
);
4668 if(isinstance(httpheaders
, list)):
4669 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4670 httpheaders
= fix_header_names(httpheaders
);
4671 if(httpuseragent
is not None):
4672 if('User-Agent' in httpheaders
):
4673 httpheaders
['User-Agent'] = httpuseragent
;
4675 httpuseragent
.update({'User-Agent': httpuseragent
});
4676 if(httpreferer
is not None):
4677 if('Referer' in httpheaders
):
4678 httpheaders
['Referer'] = httpreferer
;
4680 httpuseragent
.update({'Referer': httpreferer
});
4681 if(isinstance(httpheaders
, dict)):
4682 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4684 geturls_text
= download_file_from_ftp_file(httpurl
);
4685 if(not geturls_text
):
4687 log
.info("Downloading URL "+httpurl
);
4688 returnval_content
= geturls_text
.read()[:];
4689 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4690 geturls_text
.close();
4693 def download_from_url_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
4694 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
4695 exec_time_start
= time
.time();
4696 myhash
= hashlib
.new("sha1");
4697 if(sys
.version
[0]=="2"):
4698 myhash
.update(httpurl
);
4699 myhash
.update(str(buffersize
));
4700 myhash
.update(str(exec_time_start
));
4701 if(sys
.version
[0]>="3"):
4702 myhash
.update(httpurl
.encode('utf-8'));
4703 myhash
.update(str(buffersize
).encode('utf-8'));
4704 myhash
.update(str(exec_time_start
).encode('utf-8'));
4705 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4707 sleep
= geturls_download_sleep
;
4708 urlparts
= urlparse
.urlparse(httpurl
);
4709 if(isinstance(httpheaders
, list)):
4710 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4711 httpheaders
= fix_header_names(httpheaders
);
4712 if(httpuseragent
is not None):
4713 if('User-Agent' in httpheaders
):
4714 httpheaders
['User-Agent'] = httpuseragent
;
4716 httpuseragent
.update({'User-Agent': httpuseragent
});
4717 if(httpreferer
is not None):
4718 if('Referer' in httpheaders
):
4719 httpheaders
['Referer'] = httpreferer
;
4721 httpuseragent
.update({'Referer': httpreferer
});
4722 if(isinstance(httpheaders
, dict)):
4723 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4725 geturls_text
= download_file_from_ftp_file(httpurl
);
4726 if(not geturls_text
):
4728 geturls_text
.seek(0, 2);
4729 downloadsize
= geturls_text
.tell();
4730 geturls_text
.seek(0, 0);
4731 if(downloadsize
is not None):
4732 downloadsize
= int(downloadsize
);
4733 if downloadsize
is None: downloadsize
= 0;
4736 log
.info("Downloading URL "+httpurl
);
4737 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4738 tmpfilename
= f
.name
;
4739 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4741 databytes
= geturls_text
.read(buffersize
);
4742 if not databytes
: break;
4743 datasize
= len(databytes
);
4744 fulldatasize
= datasize
+ fulldatasize
;
4747 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4748 downloaddiff
= fulldatasize
- prevdownsize
;
4749 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4750 prevdownsize
= fulldatasize
;
4753 geturls_text
.close();
4754 exec_time_end
= time
.time();
4755 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4756 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4759 def download_from_url_to_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
4760 global geturls_download_sleep
;
4762 sleep
= geturls_download_sleep
;
4763 if(not outfile
=="-"):
4764 outpath
= outpath
.rstrip(os
.path
.sep
);
4765 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4766 if(not os
.path
.exists(outpath
)):
4767 os
.makedirs(outpath
);
4768 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4770 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4772 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4773 if(not pretmpfilename
):
4775 tmpfilename
= pretmpfilename
['Filename'];
4776 downloadsize
= os
.path
.getsize(tmpfilename
);
4778 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4779 exec_time_start
= time
.time();
4780 shutil
.move(tmpfilename
, filepath
);
4781 exec_time_end
= time
.time();
4782 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4783 if(os
.path
.exists(tmpfilename
)):
4784 os
.remove(tmpfilename
);
4785 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4786 if(outfile
=="-" and sys
.version
[0]=="2"):
4787 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4788 if(not pretmpfilename
):
4790 tmpfilename
= pretmpfilename
['Filename'];
4791 downloadsize
= os
.path
.getsize(tmpfilename
);
4794 exec_time_start
= time
.time();
4795 with
open(tmpfilename
, 'rb') as ft
:
4798 databytes
= ft
.read(buffersize
[1]);
4799 if not databytes
: break;
4800 datasize
= len(databytes
);
4801 fulldatasize
= datasize
+ fulldatasize
;
4804 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4805 downloaddiff
= fulldatasize
- prevdownsize
;
4806 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4807 prevdownsize
= fulldatasize
;
4810 fdata
= f
.getvalue();
4813 os
.remove(tmpfilename
);
4814 exec_time_end
= time
.time();
4815 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4816 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4817 if(outfile
=="-" and sys
.version
[0]>="3"):
4818 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4819 tmpfilename
= pretmpfilename
['Filename'];
4820 downloadsize
= os
.path
.getsize(tmpfilename
);
4823 exec_time_start
= time
.time();
4824 with
open(tmpfilename
, 'rb') as ft
:
4827 databytes
= ft
.read(buffersize
[1]);
4828 if not databytes
: break;
4829 datasize
= len(databytes
);
4830 fulldatasize
= datasize
+ fulldatasize
;
4833 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4834 downloaddiff
= fulldatasize
- prevdownsize
;
4835 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4836 prevdownsize
= fulldatasize
;
4839 fdata
= f
.getvalue();
4842 os
.remove(tmpfilename
);
4843 exec_time_end
= time
.time();
4844 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4845 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
4848 def upload_file_to_ftp_file(ftpfile
, url
):
4849 urlparts
= urlparse
.urlparse(url
);
4850 file_name
= os
.path
.basename(urlparts
.path
);
4851 file_dir
= os
.path
.dirname(urlparts
.path
);
4852 if(urlparts
.username
is not None):
4853 ftp_username
= urlparts
.username
;
4855 ftp_username
= "anonymous";
4856 if(urlparts
.password
is not None):
4857 ftp_password
= urlparts
.password
;
4858 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4859 ftp_password
= "anonymous";
4862 if(urlparts
.scheme
=="ftp"):
4864 elif(urlparts
.scheme
=="ftps"):
4868 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4870 ftp_port
= urlparts
.port
;
4871 if(urlparts
.port
is None):
4874 ftp
.connect(urlparts
.hostname
, ftp_port
);
4875 except socket
.gaierror
:
4876 log
.info("Error With URL "+httpurl
);
4878 except socket
.timeout
:
4879 log
.info("Error With URL "+httpurl
);
4881 ftp
.login(urlparts
.username
, urlparts
.password
);
4882 if(urlparts
.scheme
=="ftps"):
4884 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
4889 def upload_file_to_ftp_string(ftpstring
, url
):
4890 ftpfileo
= BytesIO(ftpstring
);
4891 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
4896 def download_file_from_sftp_file(url
):
4897 urlparts
= urlparse
.urlparse(url
);
4898 file_name
= os
.path
.basename(urlparts
.path
);
4899 file_dir
= os
.path
.dirname(urlparts
.path
);
4900 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4902 sftp_port
= urlparts
.port
;
4903 if(urlparts
.port
is None):
4906 sftp_port
= urlparts
.port
;
4907 if(urlparts
.username
is not None):
4908 sftp_username
= urlparts
.username
;
4910 sftp_username
= "anonymous";
4911 if(urlparts
.password
is not None):
4912 sftp_password
= urlparts
.password
;
4913 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4914 sftp_password
= "anonymous";
4917 if(urlparts
.scheme
!="sftp"):
4919 ssh
= paramiko
.SSHClient();
4920 ssh
.load_system_host_keys();
4921 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
4923 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
4924 except paramiko
.ssh_exception
.SSHException
:
4926 except socket
.gaierror
:
4927 log
.info("Error With URL "+httpurl
);
4929 except socket
.timeout
:
4930 log
.info("Error With URL "+httpurl
);
4932 sftp
= ssh
.open_sftp();
4933 sftpfile
= BytesIO();
4934 sftp
.getfo(urlparts
.path
, sftpfile
);
4937 sftpfile
.seek(0, 0);
4940 def download_file_from_sftp_file(url
):
4944 def download_file_from_sftp_string(url
):
4945 sftpfile
= download_file_from_sftp_file(url
);
4946 return sftpfile
.read();
4948 def download_file_from_ftp_string(url
):
4952 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
4953 global geturls_download_sleep
, havebrotli
;
4955 sleep
= geturls_download_sleep
;
4956 urlparts
= urlparse
.urlparse(httpurl
);
4957 if(isinstance(httpheaders
, list)):
4958 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4959 httpheaders
= fix_header_names(httpheaders
);
4960 if(httpuseragent
is not None):
4961 if('User-Agent' in httpheaders
):
4962 httpheaders
['User-Agent'] = httpuseragent
;
4964 httpuseragent
.update({'User-Agent': httpuseragent
});
4965 if(httpreferer
is not None):
4966 if('Referer' in httpheaders
):
4967 httpheaders
['Referer'] = httpreferer
;
4969 httpuseragent
.update({'Referer': httpreferer
});
4970 if(isinstance(httpheaders
, dict)):
4971 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4973 geturls_text
= download_file_from_sftp_file(httpurl
);
4974 if(not geturls_text
):
4976 log
.info("Downloading URL "+httpurl
);
4977 returnval_content
= geturls_text
.read()[:];
4978 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4979 geturls_text
.close();
4982 if(not haveparamiko
):
4983 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
4987 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
4988 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
4989 exec_time_start
= time
.time();
4990 myhash
= hashlib
.new("sha1");
4991 if(sys
.version
[0]=="2"):
4992 myhash
.update(httpurl
);
4993 myhash
.update(str(buffersize
));
4994 myhash
.update(str(exec_time_start
));
4995 if(sys
.version
[0]>="3"):
4996 myhash
.update(httpurl
.encode('utf-8'));
4997 myhash
.update(str(buffersize
).encode('utf-8'));
4998 myhash
.update(str(exec_time_start
).encode('utf-8'));
4999 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
5001 sleep
= geturls_download_sleep
;
5002 urlparts
= urlparse
.urlparse(httpurl
);
5003 if(isinstance(httpheaders
, list)):
5004 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
5005 httpheaders
= fix_header_names(httpheaders
);
5006 if(httpuseragent
is not None):
5007 if('User-Agent' in httpheaders
):
5008 httpheaders
['User-Agent'] = httpuseragent
;
5010 httpuseragent
.update({'User-Agent': httpuseragent
});
5011 if(httpreferer
is not None):
5012 if('Referer' in httpheaders
):
5013 httpheaders
['Referer'] = httpreferer
;
5015 httpuseragent
.update({'Referer': httpreferer
});
5016 if(isinstance(httpheaders
, dict)):
5017 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
5019 geturls_text
= download_file_from_sftp_file(httpurl
);
5020 if(not geturls_text
):
5022 geturls_text
.seek(0, 2);
5023 downloadsize
= geturls_text
.tell();
5024 geturls_text
.seek(0, 0);
5025 if(downloadsize
is not None):
5026 downloadsize
= int(downloadsize
);
5027 if downloadsize
is None: downloadsize
= 0;
5030 log
.info("Downloading URL "+httpurl
);
5031 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
5032 tmpfilename
= f
.name
;
5033 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5035 databytes
= geturls_text
.read(buffersize
);
5036 if not databytes
: break;
5037 datasize
= len(databytes
);
5038 fulldatasize
= datasize
+ fulldatasize
;
5041 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5042 downloaddiff
= fulldatasize
- prevdownsize
;
5043 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5044 prevdownsize
= fulldatasize
;
5047 geturls_text
.close();
5048 exec_time_end
= time
.time();
5049 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
5050 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
5053 if(not haveparamiko
):
5054 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
5058 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
5059 global geturls_download_sleep
;
5061 sleep
= geturls_download_sleep
;
5062 if(not outfile
=="-"):
5063 outpath
= outpath
.rstrip(os
.path
.sep
);
5064 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
5065 if(not os
.path
.exists(outpath
)):
5066 os
.makedirs(outpath
);
5067 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
5069 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
5071 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
5072 if(not pretmpfilename
):
5074 tmpfilename
= pretmpfilename
['Filename'];
5075 downloadsize
= os
.path
.getsize(tmpfilename
);
5077 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
5078 exec_time_start
= time
.time();
5079 shutil
.move(tmpfilename
, filepath
);
5080 exec_time_end
= time
.time();
5081 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
5082 if(os
.path
.exists(tmpfilename
)):
5083 os
.remove(tmpfilename
);
5084 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
5085 if(outfile
=="-" and sys
.version
[0]=="2"):
5086 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
5087 if(not pretmpfilename
):
5089 tmpfilename
= pretmpfilename
['Filename'];
5090 downloadsize
= os
.path
.getsize(tmpfilename
);
5093 exec_time_start
= time
.time();
5094 with
open(tmpfilename
, 'rb') as ft
:
5097 databytes
= ft
.read(buffersize
[1]);
5098 if not databytes
: break;
5099 datasize
= len(databytes
);
5100 fulldatasize
= datasize
+ fulldatasize
;
5103 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5104 downloaddiff
= fulldatasize
- prevdownsize
;
5105 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5106 prevdownsize
= fulldatasize
;
5109 fdata
= f
.getvalue();
5112 os
.remove(tmpfilename
);
5113 exec_time_end
= time
.time();
5114 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5115 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
5116 if(outfile
=="-" and sys
.version
[0]>="3"):
5117 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
5118 tmpfilename
= pretmpfilename
['Filename'];
5119 downloadsize
= os
.path
.getsize(tmpfilename
);
5122 exec_time_start
= time
.time();
5123 with
open(tmpfilename
, 'rb') as ft
:
5126 databytes
= ft
.read(buffersize
[1]);
5127 if not databytes
: break;
5128 datasize
= len(databytes
);
5129 fulldatasize
= datasize
+ fulldatasize
;
5132 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5133 downloaddiff
= fulldatasize
- prevdownsize
;
5134 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5135 prevdownsize
= fulldatasize
;
5138 fdata
= f
.getvalue();
5141 os
.remove(tmpfilename
);
5142 exec_time_end
= time
.time();
5143 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5144 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
5147 if(not haveparamiko
):
5148 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
5152 def upload_file_to_sftp_file(sftpfile
, url
):
5153 urlparts
= urlparse
.urlparse(url
);
5154 file_name
= os
.path
.basename(urlparts
.path
);
5155 file_dir
= os
.path
.dirname(urlparts
.path
);
5156 sftp_port
= urlparts
.port
;
5157 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5159 if(urlparts
.port
is None):
5162 sftp_port
= urlparts
.port
;
5163 if(urlparts
.username
is not None):
5164 sftp_username
= urlparts
.username
;
5166 sftp_username
= "anonymous";
5167 if(urlparts
.password
is not None):
5168 sftp_password
= urlparts
.password
;
5169 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5170 sftp_password
= "anonymous";
5173 if(urlparts
.scheme
!="sftp"):
5175 ssh
= paramiko
.SSHClient();
5176 ssh
.load_system_host_keys();
5177 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
5179 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5180 except paramiko
.ssh_exception
.SSHException
:
5182 except socket
.gaierror
:
5183 log
.info("Error With URL "+httpurl
);
5185 except socket
.timeout
:
5186 log
.info("Error With URL "+httpurl
);
5188 sftp
= ssh
.open_sftp();
5189 sftp
.putfo(sftpfile
, urlparts
.path
);
5192 sftpfile
.seek(0, 0);
5195 def upload_file_to_sftp_file(sftpfile
, url
):
5199 def upload_file_to_sftp_string(sftpstring
, url
):
5200 sftpfileo
= BytesIO(sftpstring
);
5201 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
5205 def upload_file_to_sftp_string(url
):
5210 def download_file_from_pysftp_file(url
):
5211 urlparts
= urlparse
.urlparse(url
);
5212 file_name
= os
.path
.basename(urlparts
.path
);
5213 file_dir
= os
.path
.dirname(urlparts
.path
);
5214 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5216 sftp_port
= urlparts
.port
;
5217 if(urlparts
.port
is None):
5220 sftp_port
= urlparts
.port
;
5221 if(urlparts
.username
is not None):
5222 sftp_username
= urlparts
.username
;
5224 sftp_username
= "anonymous";
5225 if(urlparts
.password
is not None):
5226 sftp_password
= urlparts
.password
;
5227 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5228 sftp_password
= "anonymous";
5231 if(urlparts
.scheme
!="sftp"):
5234 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5235 except paramiko
.ssh_exception
.SSHException
:
5237 except socket
.gaierror
:
5238 log
.info("Error With URL "+httpurl
);
5240 except socket
.timeout
:
5241 log
.info("Error With URL "+httpurl
);
5243 sftp
= ssh
.open_sftp();
5244 sftpfile
= BytesIO();
5245 sftp
.getfo(urlparts
.path
, sftpfile
);
5248 sftpfile
.seek(0, 0);
5251 def download_file_from_pysftp_file(url
):
5255 def download_file_from_pysftp_string(url
):
5256 sftpfile
= download_file_from_pysftp_file(url
);
5257 return sftpfile
.read();
5259 def download_file_from_ftp_string(url
):
5263 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
5264 global geturls_download_sleep
, havebrotli
;
5266 sleep
= geturls_download_sleep
;
5267 urlparts
= urlparse
.urlparse(httpurl
);
5268 if(isinstance(httpheaders
, list)):
5269 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
5270 httpheaders
= fix_header_names(httpheaders
);
5271 if(isinstance(httpheaders
, dict)):
5272 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
5274 geturls_text
= download_file_from_pysftp_file(httpurl
);
5275 if(not geturls_text
):
5277 log
.info("Downloading URL "+httpurl
);
5278 returnval_content
= geturls_text
.read()[:];
5279 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5280 geturls_text
.close();
5284 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
5288 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
5289 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
5290 exec_time_start
= time
.time();
5291 myhash
= hashlib
.new("sha1");
5292 if(sys
.version
[0]=="2"):
5293 myhash
.update(httpurl
);
5294 myhash
.update(str(buffersize
));
5295 myhash
.update(str(exec_time_start
));
5296 if(sys
.version
[0]>="3"):
5297 myhash
.update(httpurl
.encode('utf-8'));
5298 myhash
.update(str(buffersize
).encode('utf-8'));
5299 myhash
.update(str(exec_time_start
).encode('utf-8'));
5300 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
5302 sleep
= geturls_download_sleep
;
5303 urlparts
= urlparse
.urlparse(httpurl
);
5304 if(isinstance(httpheaders
, list)):
5305 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
5306 httpheaders
= fix_header_names(httpheaders
);
5307 if(isinstance(httpheaders
, dict)):
5308 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
5310 geturls_text
= download_file_from_pysftp_file(httpurl
);
5311 if(not geturls_text
):
5313 geturls_text
.seek(0, 2);
5314 downloadsize
= geturls_text
.tell();
5315 geturls_text
.seek(0, 0);
5316 if(downloadsize
is not None):
5317 downloadsize
= int(downloadsize
);
5318 if downloadsize
is None: downloadsize
= 0;
5321 log
.info("Downloading URL "+httpurl
);
5322 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
5323 tmpfilename
= f
.name
;
5324 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5326 databytes
= geturls_text
.read(buffersize
);
5327 if not databytes
: break;
5328 datasize
= len(databytes
);
5329 fulldatasize
= datasize
+ fulldatasize
;
5332 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5333 downloaddiff
= fulldatasize
- prevdownsize
;
5334 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5335 prevdownsize
= fulldatasize
;
5338 geturls_text
.close();
5339 exec_time_end
= time
.time();
5340 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
5341 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
5345 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
5349 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
5350 global geturls_download_sleep
;
5352 sleep
= geturls_download_sleep
;
5353 if(not outfile
=="-"):
5354 outpath
= outpath
.rstrip(os
.path
.sep
);
5355 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
5356 if(not os
.path
.exists(outpath
)):
5357 os
.makedirs(outpath
);
5358 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
5360 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
5362 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
5363 if(not pretmpfilename
):
5365 tmpfilename
= pretmpfilename
['Filename'];
5366 downloadsize
= os
.path
.getsize(tmpfilename
);
5368 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
5369 exec_time_start
= time
.time();
5370 shutil
.move(tmpfilename
, filepath
);
5371 exec_time_end
= time
.time();
5372 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
5373 if(os
.path
.exists(tmpfilename
)):
5374 os
.remove(tmpfilename
);
5375 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
5376 if(outfile
=="-" and sys
.version
[0]=="2"):
5377 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
5378 if(not pretmpfilename
):
5380 tmpfilename
= pretmpfilename
['Filename'];
5381 downloadsize
= os
.path
.getsize(tmpfilename
);
5384 exec_time_start
= time
.time();
5385 with
open(tmpfilename
, 'rb') as ft
:
5388 databytes
= ft
.read(buffersize
[1]);
5389 if not databytes
: break;
5390 datasize
= len(databytes
);
5391 fulldatasize
= datasize
+ fulldatasize
;
5394 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5395 downloaddiff
= fulldatasize
- prevdownsize
;
5396 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5397 prevdownsize
= fulldatasize
;
5400 fdata
= f
.getvalue();
5403 os
.remove(tmpfilename
);
5404 exec_time_end
= time
.time();
5405 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5406 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
5407 if(outfile
=="-" and sys
.version
[0]>="3"):
5408 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
5409 tmpfilename
= pretmpfilename
['Filename'];
5410 downloadsize
= os
.path
.getsize(tmpfilename
);
5413 exec_time_start
= time
.time();
5414 with
open(tmpfilename
, 'rb') as ft
:
5417 databytes
= ft
.read(buffersize
[1]);
5418 if not databytes
: break;
5419 datasize
= len(databytes
);
5420 fulldatasize
= datasize
+ fulldatasize
;
5423 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5424 downloaddiff
= fulldatasize
- prevdownsize
;
5425 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5426 prevdownsize
= fulldatasize
;
5429 fdata
= f
.getvalue();
5432 os
.remove(tmpfilename
);
5433 exec_time_end
= time
.time();
5434 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5435 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
5439 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
5443 def upload_file_to_pysftp_file(sftpfile
, url
):
5444 urlparts
= urlparse
.urlparse(url
);
5445 file_name
= os
.path
.basename(urlparts
.path
);
5446 file_dir
= os
.path
.dirname(urlparts
.path
);
5447 sftp_port
= urlparts
.port
;
5448 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5450 if(urlparts
.port
is None):
5453 sftp_port
= urlparts
.port
;
5454 if(urlparts
.username
is not None):
5455 sftp_username
= urlparts
.username
;
5457 sftp_username
= "anonymous";
5458 if(urlparts
.password
is not None):
5459 sftp_password
= urlparts
.password
;
5460 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5461 sftp_password
= "anonymous";
5464 if(urlparts
.scheme
!="sftp"):
5467 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5468 except paramiko
.ssh_exception
.SSHException
:
5470 except socket
.gaierror
:
5471 log
.info("Error With URL "+httpurl
);
5473 except socket
.timeout
:
5474 log
.info("Error With URL "+httpurl
);
5476 sftp
= ssh
.open_sftp();
5477 sftp
.putfo(sftpfile
, urlparts
.path
);
5480 sftpfile
.seek(0, 0);
5483 def upload_file_to_pysftp_file(sftpfile
, url
):
5487 def upload_file_to_pysftp_string(sftpstring
, url
):
5488 sftpfileo
= BytesIO(sftpstring
);
5489 sftpfile
= upload_file_to_pysftp_files(ftpfileo
, url
);
5493 def upload_file_to_pysftp_string(url
):