4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwget.py - Last Update: 9/20/2023 Ver. 1.2.12 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, gzip
, time
, argparse
, cgi
, subprocess
, socket
, email
.utils
, datetime
, time
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
29 havemechanize
= False;
34 havemechanize
= False;
55 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
83 if(sys
.version
[0]=="2"):
85 from cStringIO
import StringIO
;
87 from StringIO
import StringIO
;
88 # From http://python-future.org/compatible_idioms.html
89 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
90 from urllib
import urlencode
;
91 from urllib
import urlopen
as urlopenalt
;
92 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
, URLError
, build_opener
, HTTPCookieProcessor
;
93 import urlparse
, cookielib
;
94 from httplib
import HTTPConnection
, HTTPSConnection
;
95 if(sys
.version
[0]>="3"):
96 from io
import StringIO
, BytesIO
;
97 # From http://python-future.org/compatible_idioms.html
98 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
99 from urllib
.request
import urlopen
, Request
, install_opener
, build_opener
, HTTPCookieProcessor
;
100 from urllib
.error
import HTTPError
, URLError
;
101 import urllib
.parse
as urlparse
;
102 import http
.cookiejar
as cookielib
;
103 from http
.client
import HTTPConnection
, HTTPSConnection
;
105 __program_name__
= "PyWWW-Get";
106 __program_alt_name__
= "PyWWWGet";
107 __program_small_name__
= "wwwget";
108 __project__
= __program_name__
;
109 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
110 __version_info__
= (1, 2, 12, "RC 1", 1);
111 __version_date_info__
= (2023, 9, 20, "RC 1", 1);
112 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
113 __revision__
= __version_info__
[3];
114 __revision_id__
= "$Id$";
115 if(__version_info__
[4] is not None):
116 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
117 if(__version_info__
[4] is None):
118 __version_date_plusrc__
= __version_date__
;
119 if(__version_info__
[3] is not None):
120 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
121 if(__version_info__
[3] is None):
122 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
124 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
126 pytempdir
= tempfile
.gettempdir();
128 PyBitness
= platform
.architecture();
129 if(PyBitness
=="32bit" or PyBitness
=="32"):
131 elif(PyBitness
=="64bit" or PyBitness
=="64"):
136 compression_supported
= "gzip, deflate";
138 compression_supported
= "gzip, deflate, br";
140 compression_supported
= "gzip, deflate";
142 geturls_cj
= cookielib
.CookieJar();
143 windowsNT4_ua_string
= "Windows NT 4.0";
144 windowsNT4_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
145 windows2k_ua_string
= "Windows NT 5.0";
146 windows2k_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
147 windowsXP_ua_string
= "Windows NT 5.1";
148 windowsXP_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
149 windowsXP64_ua_string
= "Windows NT 5.2; Win64; x64";
150 windowsXP64_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
151 windows7_ua_string
= "Windows NT 6.1; Win64; x64";
152 windows7_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
153 windows8_ua_string
= "Windows NT 6.2; Win64; x64";
154 windows8_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
155 windows81_ua_string
= "Windows NT 6.3; Win64; x64";
156 windows81_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
157 windows10_ua_string
= "Windows NT 10.0; Win64; x64";
158 windows10_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
159 windows11_ua_string
= "Windows NT 11.0; Win64; x64";
160 windows11_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
161 geturls_ua_firefox_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:109.0) Gecko/20100101 Firefox/117.0";
162 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
163 geturls_ua_chrome_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
164 geturls_ua_chromium_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
165 geturls_ua_palemoon_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
166 geturls_ua_opera_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
167 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
168 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; Trident/7.0; rv:11.0) like Gecko";
169 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
170 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
171 if(platform
.python_implementation()!=""):
172 py_implementation
= platform
.python_implementation();
173 if(platform
.python_implementation()==""):
174 py_implementation
= "Python";
175 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=py_implementation
, pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
176 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
177 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
178 geturls_ua
= geturls_ua_firefox_windows7
;
179 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
180 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
181 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
182 geturls_headers_chrome_windows7
.update(windows7_ua_addon
);
183 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
184 geturls_headers_chromium_windows7
.update(windows7_ua_addon
);
185 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
186 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
187 geturls_headers_opera_windows7
.update(windows7_ua_addon
);
188 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
189 geturls_headers_vivaldi_windows7
.update(windows7_ua_addon
);
190 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
191 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
192 geturls_headers_microsoft_edge_windows7
.update(windows7_ua_addon
);
193 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
194 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
195 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
196 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
197 geturls_headers
= geturls_headers_firefox_windows7
;
198 geturls_download_sleep
= 0;
200 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
201 if(outtype
=="print" and dbgenable
):
204 elif(outtype
=="log" and dbgenable
):
205 logging
.info(dbgtxt
);
207 elif(outtype
=="warning" and dbgenable
):
208 logging
.warning(dbgtxt
);
210 elif(outtype
=="error" and dbgenable
):
211 logging
.error(dbgtxt
);
213 elif(outtype
=="critical" and dbgenable
):
214 logging
.critical(dbgtxt
);
216 elif(outtype
=="exception" and dbgenable
):
217 logging
.exception(dbgtxt
);
219 elif(outtype
=="logalt" and dbgenable
):
220 logging
.log(dgblevel
, dbgtxt
);
222 elif(outtype
=="debug" and dbgenable
):
223 logging
.debug(dbgtxt
);
231 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
232 dbgout
= verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
237 def add_url_param(url
, **params
):
239 parts
= list(urlparse
.urlsplit(url
));
240 d
= dict(cgi
.parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
242 parts
[n
]=urlencode(d
);
243 return urlparse
.urlunsplit(parts
);
245 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
246 def which_exec(execfile):
247 for path
in os
.environ
["PATH"].split(":"):
248 if os
.path
.exists(path
+ "/" + execfile):
249 return path
+ "/" + execfile;
251 def listize(varlist
):
259 newlistreg
.update({ilx
: varlist
[il
]});
260 newlistrev
.update({varlist
[il
]: ilx
});
263 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
266 def twolistize(varlist
):
276 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
277 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
278 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
279 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
282 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
283 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
284 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
287 def arglistize(proexec
, *varlist
):
291 newarglist
= [proexec
];
293 if varlist
[il
][0] is not None:
294 newarglist
.append(varlist
[il
][0]);
295 if varlist
[il
][1] is not None:
296 newarglist
.append(varlist
[il
][1]);
300 def fix_header_names(header_dict
):
301 header_dict
= {k
.title(): v
for k
, v
in header_dict
.items()};
304 # hms_string by ArcGIS Python Recipes
305 # https://arcpy.wordpress.com/2012/04/20/146/
306 def hms_string(sec_elapsed
):
307 h
= int(sec_elapsed
/ (60 * 60));
308 m
= int((sec_elapsed
% (60 * 60)) / 60);
309 s
= sec_elapsed
% 60.0;
310 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
312 # get_readable_size by Lipis
313 # http://stackoverflow.com/posts/14998888/revisions
314 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
316 if(unit
!="IEC" and unit
!="SI"):
319 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
320 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
323 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
324 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
329 if abs(bytes
) < unitsize
:
330 strformat
= "%3."+str(precision
)+"f%s";
331 pre_return_val
= (strformat
% (bytes
, unit
));
332 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
333 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
334 alt_return_val
= pre_return_val
.split();
335 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
338 strformat
= "%."+str(precision
)+"f%s";
339 pre_return_val
= (strformat
% (bytes
, "YiB"));
340 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
341 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
342 alt_return_val
= pre_return_val
.split();
343 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
346 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
348 usehashtypes
= usehashtypes
.lower();
349 getfilesize
= os
.path
.getsize(infile
);
350 return_val
= get_readable_size(getfilesize
, precision
, unit
);
352 hashtypelist
= usehashtypes
.split(",");
353 openfile
= open(infile
, "rb");
354 filecontents
= openfile
.read();
357 listnumend
= len(hashtypelist
);
358 while(listnumcount
< listnumend
):
359 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
360 hashtypelistup
= hashtypelistlow
.upper();
361 filehash
= hashlib
.new(hashtypelistup
);
362 filehash
.update(filecontents
);
363 filegethash
= filehash
.hexdigest();
364 return_val
.update({hashtypelistup
: filegethash
});
368 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
370 usehashtypes
= usehashtypes
.lower();
371 getfilesize
= len(instring
);
372 return_val
= get_readable_size(getfilesize
, precision
, unit
);
374 hashtypelist
= usehashtypes
.split(",");
376 listnumend
= len(hashtypelist
);
377 while(listnumcount
< listnumend
):
378 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
379 hashtypelistup
= hashtypelistlow
.upper();
380 filehash
= hashlib
.new(hashtypelistup
);
381 if(sys
.version
[0]=="2"):
382 filehash
.update(instring
);
383 if(sys
.version
[0]>="3"):
384 filehash
.update(instring
.encode('utf-8'));
385 filegethash
= filehash
.hexdigest();
386 return_val
.update({hashtypelistup
: filegethash
});
390 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
391 if isinstance(headers
, dict):
393 if(sys
.version
[0]=="2"):
394 for headkey
, headvalue
in headers
.iteritems():
395 returnval
.append((headkey
, headvalue
));
396 if(sys
.version
[0]>="3"):
397 for headkey
, headvalue
in headers
.items():
398 returnval
.append((headkey
, headvalue
));
399 elif isinstance(headers
, list):
405 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
406 if isinstance(headers
, dict):
408 if(sys
.version
[0]=="2"):
409 for headkey
, headvalue
in headers
.iteritems():
410 returnval
.append(headkey
+": "+headvalue
);
411 if(sys
.version
[0]>="3"):
412 for headkey
, headvalue
in headers
.items():
413 returnval
.append(headkey
+": "+headvalue
);
414 elif isinstance(headers
, list):
420 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", compression_supported
), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
421 if isinstance(headers
, list):
426 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
428 elif isinstance(headers
, dict):
434 def get_httplib_support(checkvalue
=None):
435 global haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
437 returnval
.append("ftp");
438 returnval
.append("httplib");
440 returnval
.append("httplib2");
441 returnval
.append("urllib");
443 returnval
.append("urllib3");
444 returnval
.append("request3");
445 returnval
.append("request");
447 returnval
.append("requests");
449 returnval
.append("httpx");
450 returnval
.append("httpx2");
452 returnval
.append("mechanize");
454 returnval
.append("sftp");
456 returnval
.append("pysftp");
457 if(not checkvalue
is None):
458 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
459 checkvalue
= "urllib";
460 if(checkvalue
=="httplib1"):
461 checkvalue
= "httplib";
462 if(checkvalue
in returnval
):
468 def check_httplib_support(checkvalue
="urllib"):
469 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
470 checkvalue
= "urllib";
471 if(checkvalue
=="httplib1"):
472 checkvalue
= "httplib";
473 returnval
= get_httplib_support(checkvalue
);
476 def get_httplib_support_list():
477 returnval
= get_httplib_support(None);
480 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", sleep
=-1):
481 global geturls_download_sleep
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
483 sleep
= geturls_download_sleep
;
484 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
485 httplibuse
= "urllib";
486 if(httplibuse
=="httplib1"):
487 httplibuse
= "httplib";
488 if(not haverequests
and httplibuse
=="requests"):
489 httplibuse
= "urllib";
490 if(not havehttpx
and httplibuse
=="httpx"):
491 httplibuse
= "urllib";
492 if(not havehttpx
and httplibuse
=="httpx2"):
493 httplibuse
= "urllib";
494 if(not havehttpcore
and httplibuse
=="httpcore"):
495 httplibuse
= "urllib";
496 if(not havehttpcore
and httplibuse
=="httpcore2"):
497 httplibuse
= "urllib";
498 if(not havemechanize
and httplibuse
=="mechanize"):
499 httplibuse
= "urllib";
500 if(not havehttplib2
and httplibuse
=="httplib2"):
501 httplibuse
= "httplib";
502 if(not haveparamiko
and httplibuse
=="sftp"):
504 if(not havepysftp
and httplibuse
=="pysftp"):
506 if(httplibuse
=="urllib"):
507 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
508 elif(httplibuse
=="request"):
509 returnval
= download_from_url_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
510 elif(httplibuse
=="request3"):
511 returnval
= download_from_url_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
512 elif(httplibuse
=="httplib"):
513 returnval
= download_from_url_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
514 elif(httplibuse
=="httplib2"):
515 returnval
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
516 elif(httplibuse
=="urllib3"):
517 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
518 elif(httplibuse
=="requests"):
519 returnval
= download_from_url_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
520 elif(httplibuse
=="httpx"):
521 returnval
= download_from_url_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
522 elif(httplibuse
=="httpx2"):
523 returnval
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
524 elif(httplibuse
=="httpcore"):
525 returnval
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
526 elif(httplibuse
=="httpcore2"):
527 returnval
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
528 elif(httplibuse
=="mechanize"):
529 returnval
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
530 elif(httplibuse
=="ftp"):
531 returnval
= download_from_url_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
532 elif(httplibuse
=="sftp"):
533 returnval
= download_from_url_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
534 elif(httplibuse
=="pysftp"):
535 returnval
= download_from_url_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
);
540 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1):
541 global geturls_download_sleep
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
543 sleep
= geturls_download_sleep
;
544 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
545 httplibuse
= "urllib";
546 if(httplibuse
=="httplib1"):
547 httplibuse
= "httplib";
548 if(not haverequests
and httplibuse
=="requests"):
549 httplibuse
= "urllib";
550 if(not havehttpx
and httplibuse
=="httpx"):
551 httplibuse
= "urllib";
552 if(not havehttpx
and httplibuse
=="httpx2"):
553 httplibuse
= "urllib";
554 if(not havehttpcore
and httplibuse
=="httpcore"):
555 httplibuse
= "urllib";
556 if(not havehttpcore
and httplibuse
=="httpcore2"):
557 httplibuse
= "urllib";
558 if(not havemechanize
and httplibuse
=="mechanize"):
559 httplibuse
= "urllib";
560 if(not havehttplib2
and httplibuse
=="httplib2"):
561 httplibuse
= "httplib";
562 if(not haveparamiko
and httplibuse
=="sftp"):
564 if(not haveparamiko
and httplibuse
=="pysftp"):
566 if(httplibuse
=="urllib"):
567 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
568 elif(httplibuse
=="request"):
569 returnval
= download_from_url_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
570 elif(httplibuse
=="request3"):
571 returnval
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
572 elif(httplibuse
=="httplib"):
573 returnval
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
574 elif(httplibuse
=="httplib2"):
575 returnval
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
576 elif(httplibuse
=="urllib3"):
577 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
578 elif(httplibuse
=="requests"):
579 returnval
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
580 elif(httplibuse
=="httpx"):
581 returnval
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
582 elif(httplibuse
=="httpx2"):
583 returnval
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
584 elif(httplibuse
=="httpcore"):
585 returnval
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
586 elif(httplibuse
=="httpcore2"):
587 returnval
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
588 elif(httplibuse
=="mechanize"):
589 returnval
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
590 elif(httplibuse
=="ftp"):
591 returnval
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
592 elif(httplibuse
=="sftp"):
593 returnval
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
594 elif(httplibuse
=="pysftp"):
595 returnval
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
600 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
601 global geturls_download_sleep
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcorei
, haveparamiko
, havepysftp
;
603 sleep
= geturls_download_sleep
;
604 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
605 httplibuse
= "urllib";
606 if(httplibuse
=="httplib1"):
607 httplibuse
= "httplib";
608 if(not haverequests
and httplibuse
=="requests"):
609 httplibuse
= "urllib";
610 if(not havehttpx
and httplibuse
=="httpx"):
611 httplibuse
= "urllib";
612 if(not havehttpx
and httplibuse
=="httpx2"):
613 httplibuse
= "urllib";
614 if(not havehttpcore
and httplibuse
=="httpcore"):
615 httplibuse
= "urllib";
616 if(not havehttpcore
and httplibuse
=="httpcore2"):
617 httplibuse
= "urllib";
618 if(not havemechanize
and httplibuse
=="mechanize"):
619 httplibuse
= "urllib";
620 if(not havehttplib2
and httplibuse
=="httplib2"):
621 httplibuse
= "httplib";
622 if(not haveparamiko
and httplibuse
=="sftp"):
624 if(not havepysftp
and httplibuse
=="pysftp"):
626 if(httplibuse
=="urllib"):
627 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
628 elif(httplibuse
=="request"):
629 returnval
= download_from_url_to_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
630 elif(httplibuse
=="request3"):
631 returnval
= download_from_url_to_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
632 elif(httplibuse
=="httplib"):
633 returnval
= download_from_url_to_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
634 elif(httplibuse
=="httplib2"):
635 returnval
= download_from_url_to_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
636 elif(httplibuse
=="urllib3"):
637 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
638 elif(httplibuse
=="requests"):
639 returnval
= download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
640 elif(httplibuse
=="httpx"):
641 returnval
= download_from_url_to_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
642 elif(httplibuse
=="httpx2"):
643 returnval
= download_from_url_to_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
644 elif(httplibuse
=="httpcore"):
645 returnval
= download_from_url_to_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
646 elif(httplibuse
=="httpcore2"):
647 returnval
= download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
648 elif(httplibuse
=="mechanize"):
649 returnval
= download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
650 elif(httplibuse
=="ftp"):
651 returnval
= download_from_url_to_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
652 elif(httplibuse
=="sftp"):
653 returnval
= download_from_url_to_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
654 elif(httplibuse
=="pysftp"):
655 returnval
= download_from_url_to_file_with_pysftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
660 def download_from_url_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
661 global geturls_download_sleep
, havebrotli
;
663 sleep
= geturls_download_sleep
;
664 urlparts
= urlparse
.urlparse(httpurl
);
665 if(isinstance(httpheaders
, list)):
666 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
667 httpheaders
= fix_header_names(httpheaders
);
668 if(httpuseragent
is not None):
669 if('User-Agent' in httpheaders
):
670 httpheaders
['User-Agent'] = httpuseragent
;
672 httpuseragent
.update({'User-Agent': httpuseragent
});
673 if(httpreferer
is not None):
674 if('Referer' in httpheaders
):
675 httpheaders
['Referer'] = httpreferer
;
677 httpuseragent
.update({'Referer': httpreferer
});
678 if(urlparts
.username
is not None or urlparts
.password
is not None):
679 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
680 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
681 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
682 if(isinstance(httpheaders
, dict)):
683 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
684 geturls_opener
.addheaders
= httpheaders
;
686 if(postdata
is not None and not isinstance(postdata
, dict)):
687 postdata
= urlencode(postdata
);
689 if(httpmethod
=="GET"):
690 geturls_text
= geturls_opener
.open(httpurl
);
691 elif(httpmethod
=="POST"):
692 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
694 geturls_text
= geturls_opener
.open(httpurl
);
695 except HTTPError
as geturls_text_error
:
696 geturls_text
= geturls_text_error
;
697 log
.info("Error With URL "+httpurl
);
699 log
.info("Error With URL "+httpurl
);
701 except socket
.timeout
:
702 log
.info("Error With URL "+httpurl
);
704 httpcodeout
= geturls_text
.getcode();
705 httpversionout
= "1.1";
706 httpmethodout
= httpmethod
;
707 httpurlout
= geturls_text
.geturl();
708 httpheaderout
= geturls_text
.info();
709 httpheadersentout
= httpheaders
;
710 if(isinstance(httpheaderout
, list)):
711 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
712 httpheaderout
= fix_header_names(httpheaderout
);
713 if(sys
.version
[0]=="2"):
715 prehttpheaderout
= httpheaderout
;
716 httpheaderkeys
= httpheaderout
.keys();
717 imax
= len(httpheaderkeys
);
721 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
723 except AttributeError:
725 if(isinstance(httpheadersentout
, list)):
726 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
727 httpheadersentout
= fix_header_names(httpheadersentout
);
728 log
.info("Downloading URL "+httpurl
);
729 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
730 if(sys
.version
[0]=="2"):
731 strbuf
= StringIO(geturls_text
.read());
732 if(sys
.version
[0]>="3"):
733 strbuf
= BytesIO(geturls_text
.read());
734 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
735 returnval_content
= gzstrbuf
.read()[:];
736 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
737 returnval_content
= geturls_text
.read()[:];
738 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
739 returnval_content
= geturls_text
.read()[:];
740 returnval_content
= brotli
.decompress(returnval_content
);
741 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
742 geturls_text
.close();
745 def download_from_url_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
746 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
747 exec_time_start
= time
.time();
748 myhash
= hashlib
.new("sha1");
749 if(sys
.version
[0]=="2"):
750 myhash
.update(httpurl
);
751 myhash
.update(str(buffersize
));
752 myhash
.update(str(exec_time_start
));
753 if(sys
.version
[0]>="3"):
754 myhash
.update(httpurl
.encode('utf-8'));
755 myhash
.update(str(buffersize
).encode('utf-8'));
756 myhash
.update(str(exec_time_start
).encode('utf-8'));
757 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
759 sleep
= geturls_download_sleep
;
760 urlparts
= urlparse
.urlparse(httpurl
);
761 if(isinstance(httpheaders
, list)):
762 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
763 httpheaders
= fix_header_names(httpheaders
);
764 if(httpuseragent
is not None):
765 if('User-Agent' in httpheaders
):
766 httpheaders
['User-Agent'] = httpuseragent
;
768 httpuseragent
.update({'User-Agent': httpuseragent
});
769 if(httpreferer
is not None):
770 if('Referer' in httpheaders
):
771 httpheaders
['Referer'] = httpreferer
;
773 httpuseragent
.update({'Referer': httpreferer
});
774 if(urlparts
.username
is not None or urlparts
.password
is not None):
775 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
776 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
777 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
778 if(isinstance(httpheaders
, dict)):
779 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
780 geturls_opener
.addheaders
= httpheaders
;
783 if(httpmethod
=="GET"):
784 geturls_text
= geturls_opener
.open(httpurl
);
785 elif(httpmethod
=="POST"):
786 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
788 geturls_text
= geturls_opener
.open(httpurl
);
789 except HTTPError
as geturls_text_error
:
790 geturls_text
= geturls_text_error
;
791 log
.info("Error With URL "+httpurl
);
793 log
.info("Error With URL "+httpurl
);
795 except socket
.timeout
:
796 log
.info("Error With URL "+httpurl
);
798 except socket
.timeout
:
799 log
.info("Error With URL "+httpurl
);
801 httpcodeout
= geturls_text
.getcode();
802 httpversionout
= "1.1";
803 httpmethodout
= httpmethod
;
804 httpurlout
= geturls_text
.geturl();
805 httpheaderout
= geturls_text
.info();
806 httpheadersentout
= httpheaders
;
807 if(isinstance(httpheaderout
, list)):
808 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
809 if(sys
.version
[0]=="2"):
811 prehttpheaderout
= httpheaderout
;
812 httpheaderkeys
= httpheaderout
.keys();
813 imax
= len(httpheaderkeys
);
817 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
819 except AttributeError:
821 httpheaderout
= fix_header_names(httpheaderout
);
822 if(isinstance(httpheadersentout
, list)):
823 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
824 httpheadersentout
= fix_header_names(httpheadersentout
);
825 downloadsize
= httpheaderout
.get('Content-Length');
826 if(downloadsize
is not None):
827 downloadsize
= int(downloadsize
);
828 if downloadsize
is None: downloadsize
= 0;
831 log
.info("Downloading URL "+httpurl
);
832 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
833 tmpfilename
= f
.name
;
835 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
836 except AttributeError:
838 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
843 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
845 databytes
= geturls_text
.read(buffersize
);
846 if not databytes
: break;
847 datasize
= len(databytes
);
848 fulldatasize
= datasize
+ fulldatasize
;
851 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
852 downloaddiff
= fulldatasize
- prevdownsize
;
853 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
854 prevdownsize
= fulldatasize
;
857 geturls_text
.close();
858 exec_time_end
= time
.time();
859 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
860 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
863 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
864 global geturls_download_sleep
;
866 sleep
= geturls_download_sleep
;
867 if(not outfile
=="-"):
868 outpath
= outpath
.rstrip(os
.path
.sep
);
869 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
870 if(not os
.path
.exists(outpath
)):
871 os
.makedirs(outpath
);
872 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
874 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
876 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
877 if(not pretmpfilename
):
879 tmpfilename
= pretmpfilename
['Filename'];
880 downloadsize
= os
.path
.getsize(tmpfilename
);
882 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
883 exec_time_start
= time
.time();
884 shutil
.move(tmpfilename
, filepath
);
886 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
887 except AttributeError:
889 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
894 exec_time_end
= time
.time();
895 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
896 if(os
.path
.exists(tmpfilename
)):
897 os
.remove(tmpfilename
);
898 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
899 if(outfile
=="-" and sys
.version
[0]=="2"):
900 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
901 if(not pretmpfilename
):
903 tmpfilename
= pretmpfilename
['Filename'];
904 downloadsize
= os
.path
.getsize(tmpfilename
);
907 exec_time_start
= time
.time();
908 with
open(tmpfilename
, 'rb') as ft
:
911 databytes
= ft
.read(buffersize
[1]);
912 if not databytes
: break;
913 datasize
= len(databytes
);
914 fulldatasize
= datasize
+ fulldatasize
;
917 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
918 downloaddiff
= fulldatasize
- prevdownsize
;
919 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
920 prevdownsize
= fulldatasize
;
923 fdata
= f
.getvalue();
926 os
.remove(tmpfilename
);
927 exec_time_end
= time
.time();
928 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
929 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
930 if(outfile
=="-" and sys
.version
[0]>="3"):
931 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
932 tmpfilename
= pretmpfilename
['Filename'];
933 downloadsize
= os
.path
.getsize(tmpfilename
);
936 exec_time_start
= time
.time();
937 with
open(tmpfilename
, 'rb') as ft
:
940 databytes
= ft
.read(buffersize
[1]);
941 if not databytes
: break;
942 datasize
= len(databytes
);
943 fulldatasize
= datasize
+ fulldatasize
;
946 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
947 downloaddiff
= fulldatasize
- prevdownsize
;
948 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
949 prevdownsize
= fulldatasize
;
952 fdata
= f
.getvalue();
955 os
.remove(tmpfilename
);
956 exec_time_end
= time
.time();
957 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
958 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
961 def download_from_url_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
962 global geturls_download_sleep
, havebrotli
;
964 sleep
= geturls_download_sleep
;
965 urlparts
= urlparse
.urlparse(httpurl
);
966 if(isinstance(httpheaders
, list)):
967 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
968 httpheaders
= fix_header_names(httpheaders
);
969 if(httpuseragent
is not None):
970 if('User-Agent' in httpheaders
):
971 httpheaders
['User-Agent'] = httpuseragent
;
973 httpuseragent
.update({'User-Agent': httpuseragent
});
974 if(httpreferer
is not None):
975 if('Referer' in httpheaders
):
976 httpheaders
['Referer'] = httpreferer
;
978 httpuseragent
.update({'Referer': httpreferer
});
979 if(urlparts
.username
is not None or urlparts
.password
is not None):
980 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
981 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
982 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
983 geturls_opener
.addheaders
= httpheaders
;
985 if(urlparts
[0]=="http"):
986 httpconn
= HTTPConnection(urlparts
[1]);
987 elif(urlparts
[0]=="https"):
988 httpconn
= HTTPSConnection(urlparts
[1]);
991 if(postdata
is not None and not isinstance(postdata
, dict)):
992 postdata
= urlencode(postdata
);
994 if(httpmethod
=="GET"):
995 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
996 elif(httpmethod
=="POST"):
997 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
999 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1000 except socket
.timeout
:
1001 log
.info("Error With URL "+httpurl
);
1003 except socket
.gaierror
:
1004 log
.info("Error With URL "+httpurl
);
1006 geturls_text
= httpconn
.getresponse();
1007 httpcodeout
= geturls_text
.status
;
1008 httpversionout
= "1.1";
1009 httpmethodout
= httpmethod
;
1010 httpurlout
= httpurl
;
1011 httpheaderout
= geturls_text
.getheaders();
1012 httpheadersentout
= httpheaders
;
1013 if(isinstance(httpheaderout
, list)):
1014 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1015 if(sys
.version
[0]=="2"):
1017 prehttpheaderout
= httpheaderout
;
1018 httpheaderkeys
= httpheaderout
.keys();
1019 imax
= len(httpheaderkeys
);
1023 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1025 except AttributeError:
1027 httpheaderout
= fix_header_names(httpheaderout
);
1028 if(isinstance(httpheadersentout
, list)):
1029 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1030 httpheadersentout
= fix_header_names(httpheadersentout
);
1031 log
.info("Downloading URL "+httpurl
);
1032 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
1033 if(sys
.version
[0]=="2"):
1034 strbuf
= StringIO(geturls_text
.read());
1035 if(sys
.version
[0]>="3"):
1036 strbuf
= BytesIO(geturls_text
.read());
1037 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1038 returnval_content
= gzstrbuf
.read()[:];
1039 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
1040 returnval_content
= geturls_text
.read()[:];
1041 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1042 returnval_content
= geturls_text
.read()[:];
1043 returnval_content
= brotli
.decompress(returnval_content
);
1044 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
1045 geturls_text
.close();
1048 def download_from_url_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1049 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1050 exec_time_start
= time
.time();
1051 myhash
= hashlib
.new("sha1");
1052 if(sys
.version
[0]=="2"):
1053 myhash
.update(httpurl
);
1054 myhash
.update(str(buffersize
));
1055 myhash
.update(str(exec_time_start
));
1056 if(sys
.version
[0]>="3"):
1057 myhash
.update(httpurl
.encode('utf-8'));
1058 myhash
.update(str(buffersize
).encode('utf-8'));
1059 myhash
.update(str(exec_time_start
).encode('utf-8'));
1060 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1062 sleep
= geturls_download_sleep
;
1063 urlparts
= urlparse
.urlparse(httpurl
);
1064 if(isinstance(httpheaders
, list)):
1065 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1066 httpheaders
= fix_header_names(httpheaders
);
1067 if(httpuseragent
is not None):
1068 if('User-Agent' in httpheaders
):
1069 httpheaders
['User-Agent'] = httpuseragent
;
1071 httpuseragent
.update({'User-Agent': httpuseragent
});
1072 if(httpreferer
is not None):
1073 if('Referer' in httpheaders
):
1074 httpheaders
['Referer'] = httpreferer
;
1076 httpuseragent
.update({'Referer': httpreferer
});
1077 if(urlparts
.username
is not None or urlparts
.password
is not None):
1078 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1079 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1080 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1081 geturls_opener
.addheaders
= httpheaders
;
1083 if(urlparts
[0]=="http"):
1084 httpconn
= HTTPConnection(urlparts
[1]);
1085 elif(urlparts
[0]=="https"):
1086 httpconn
= HTTPSConnection(urlparts
[1]);
1089 if(postdata
is not None and not isinstance(postdata
, dict)):
1090 postdata
= urlencode(postdata
);
1092 if(httpmethod
=="GET"):
1093 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1094 elif(httpmethod
=="POST"):
1095 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1097 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1098 except socket
.timeout
:
1099 log
.info("Error With URL "+httpurl
);
1101 except socket
.gaierror
:
1102 log
.info("Error With URL "+httpurl
);
1104 geturls_text
= httpconn
.getresponse();
1105 httpcodeout
= geturls_text
.status
;
1106 httpversionout
= "1.1";
1107 httpmethodout
= httpmethod
;
1108 httpurlout
= httpurl
;
1109 httpheaderout
= geturls_text
.getheaders();
1110 httpheadersentout
= httpheaders
;
1111 if(isinstance(httpheaderout
, list)):
1112 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1113 if(sys
.version
[0]=="2"):
1115 prehttpheaderout
= httpheaderout
;
1116 httpheaderkeys
= httpheaderout
.keys();
1117 imax
= len(httpheaderkeys
);
1121 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1123 except AttributeError:
1125 httpheaderout
= fix_header_names(httpheaderout
);
1126 if(isinstance(httpheadersentout
, list)):
1127 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1128 httpheadersentout
= fix_header_names(httpheadersentout
);
1129 downloadsize
= httpheaderout
.get('Content-Length');
1130 if(downloadsize
is not None):
1131 downloadsize
= int(downloadsize
);
1132 if downloadsize
is None: downloadsize
= 0;
1135 log
.info("Downloading URL "+httpurl
);
1136 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1137 tmpfilename
= f
.name
;
1139 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
1140 except AttributeError:
1142 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1147 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
1149 databytes
= geturls_text
.read(buffersize
);
1150 if not databytes
: break;
1151 datasize
= len(databytes
);
1152 fulldatasize
= datasize
+ fulldatasize
;
1155 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1156 downloaddiff
= fulldatasize
- prevdownsize
;
1157 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1158 prevdownsize
= fulldatasize
;
1161 geturls_text
.close();
1162 exec_time_end
= time
.time();
1163 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1164 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1167 def download_from_url_to_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1168 global geturls_download_sleep
;
1170 sleep
= geturls_download_sleep
;
1171 if(not outfile
=="-"):
1172 outpath
= outpath
.rstrip(os
.path
.sep
);
1173 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1174 if(not os
.path
.exists(outpath
)):
1175 os
.makedirs(outpath
);
1176 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1178 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1180 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1181 if(not pretmpfilename
):
1183 tmpfilename
= pretmpfilename
['Filename'];
1184 downloadsize
= os
.path
.getsize(tmpfilename
);
1186 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1187 exec_time_start
= time
.time();
1188 shutil
.move(tmpfilename
, filepath
);
1190 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1191 except AttributeError:
1193 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1198 exec_time_end
= time
.time();
1199 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1200 if(os
.path
.exists(tmpfilename
)):
1201 os
.remove(tmpfilename
);
1202 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1203 if(outfile
=="-" and sys
.version
[0]=="2"):
1204 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1205 if(not pretmpfilename
):
1207 tmpfilename
= pretmpfilename
['Filename'];
1208 downloadsize
= os
.path
.getsize(tmpfilename
);
1211 exec_time_start
= time
.time();
1212 with
open(tmpfilename
, 'rb') as ft
:
1215 databytes
= ft
.read(buffersize
[1]);
1216 if not databytes
: break;
1217 datasize
= len(databytes
);
1218 fulldatasize
= datasize
+ fulldatasize
;
1221 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1222 downloaddiff
= fulldatasize
- prevdownsize
;
1223 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1224 prevdownsize
= fulldatasize
;
1227 fdata
= f
.getvalue();
1230 os
.remove(tmpfilename
);
1231 exec_time_end
= time
.time();
1232 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1233 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1234 if(outfile
=="-" and sys
.version
[0]>="3"):
1235 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1236 tmpfilename
= pretmpfilename
['Filename'];
1237 downloadsize
= os
.path
.getsize(tmpfilename
);
1240 exec_time_start
= time
.time();
1241 with
open(tmpfilename
, 'rb') as ft
:
1244 databytes
= ft
.read(buffersize
[1]);
1245 if not databytes
: break;
1246 datasize
= len(databytes
);
1247 fulldatasize
= datasize
+ fulldatasize
;
1250 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1251 downloaddiff
= fulldatasize
- prevdownsize
;
1252 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1253 prevdownsize
= fulldatasize
;
1256 fdata
= f
.getvalue();
1259 os
.remove(tmpfilename
);
1260 exec_time_end
= time
.time();
1261 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1262 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1266 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1267 global geturls_download_sleep
, havebrotli
;
1269 sleep
= geturls_download_sleep
;
1270 urlparts
= urlparse
.urlparse(httpurl
);
1271 if(isinstance(httpheaders
, list)):
1272 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1273 httpheaders
= fix_header_names(httpheaders
);
1274 if(httpuseragent
is not None):
1275 if('User-Agent' in httpheaders
):
1276 httpheaders
['User-Agent'] = httpuseragent
;
1278 httpuseragent
.update({'User-Agent': httpuseragent
});
1279 if(httpreferer
is not None):
1280 if('Referer' in httpheaders
):
1281 httpheaders
['Referer'] = httpreferer
;
1283 httpuseragent
.update({'Referer': httpreferer
});
1284 if(urlparts
.username
is not None or urlparts
.password
is not None):
1285 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1286 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1287 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1288 geturls_opener
.addheaders
= httpheaders
;
1290 if(urlparts
[0]=="http"):
1291 httpconn
= HTTPConnectionWithTimeout(urlparts
[1]);
1292 elif(urlparts
[0]=="https"):
1293 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1]);
1296 if(postdata
is not None and not isinstance(postdata
, dict)):
1297 postdata
= urlencode(postdata
);
1299 if(httpmethod
=="GET"):
1300 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1301 elif(httpmethod
=="POST"):
1302 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1304 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1305 except socket
.timeout
:
1306 log
.info("Error With URL "+httpurl
);
1308 except socket
.gaierror
:
1309 log
.info("Error With URL "+httpurl
);
1311 geturls_text
= httpconn
.getresponse();
1312 httpcodeout
= geturls_text
.status
;
1313 httpversionout
= "1.1";
1314 httpmethodout
= httpmethod
;
1315 httpurlout
= httpurl
;
1316 httpheaderout
= geturls_text
.getheaders();
1317 httpheadersentout
= httpheaders
;
1318 if(isinstance(httpheaderout
, list)):
1319 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1320 if(sys
.version
[0]=="2"):
1322 prehttpheaderout
= httpheaderout
;
1323 httpheaderkeys
= httpheaderout
.keys();
1324 imax
= len(httpheaderkeys
);
1328 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1330 except AttributeError:
1332 httpheaderout
= fix_header_names(httpheaderout
);
1333 if(isinstance(httpheadersentout
, list)):
1334 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1335 httpheadersentout
= fix_header_names(httpheadersentout
);
1336 log
.info("Downloading URL "+httpurl
);
1337 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
1338 if(sys
.version
[0]=="2"):
1339 strbuf
= StringIO(geturls_text
.read());
1340 if(sys
.version
[0]>="3"):
1341 strbuf
= BytesIO(geturls_text
.read());
1342 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1343 returnval_content
= gzstrbuf
.read()[:];
1344 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
1345 returnval_content
= geturls_text
.read()[:];
1346 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1347 returnval_content
= geturls_text
.read()[:];
1348 returnval_content
= brotli
.decompress(returnval_content
);
1349 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
1350 geturls_text
.close();
1353 if(not havehttplib2
):
1354 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1355 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
)
1359 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1360 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1361 exec_time_start
= time
.time();
1362 myhash
= hashlib
.new("sha1");
1363 if(sys
.version
[0]=="2"):
1364 myhash
.update(httpurl
);
1365 myhash
.update(str(buffersize
));
1366 myhash
.update(str(exec_time_start
));
1367 if(sys
.version
[0]>="3"):
1368 myhash
.update(httpurl
.encode('utf-8'));
1369 myhash
.update(str(buffersize
).encode('utf-8'));
1370 myhash
.update(str(exec_time_start
).encode('utf-8'));
1371 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1373 sleep
= geturls_download_sleep
;
1374 urlparts
= urlparse
.urlparse(httpurl
);
1375 if(isinstance(httpheaders
, list)):
1376 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1377 httpheaders
= fix_header_names(httpheaders
);
1378 if(httpuseragent
is not None):
1379 if('User-Agent' in httpheaders
):
1380 httpheaders
['User-Agent'] = httpuseragent
;
1382 httpuseragent
.update({'User-Agent': httpuseragent
});
1383 if(httpreferer
is not None):
1384 if('Referer' in httpheaders
):
1385 httpheaders
['Referer'] = httpreferer
;
1387 httpuseragent
.update({'Referer': httpreferer
});
1388 if(urlparts
.username
is not None or urlparts
.password
is not None):
1389 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1390 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1391 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1392 geturls_opener
.addheaders
= httpheaders
;
1394 if(urlparts
[0]=="http"):
1395 httpconn
= HTTPConnectionWithTimeout(urlparts
[1]);
1396 elif(urlparts
[0]=="https"):
1397 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1]);
1400 if(postdata
is not None and not isinstance(postdata
, dict)):
1401 postdata
= urlencode(postdata
);
1403 if(httpmethod
=="GET"):
1404 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1405 elif(httpmethod
=="POST"):
1406 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1408 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1409 except socket
.timeout
:
1410 log
.info("Error With URL "+httpurl
);
1412 except socket
.gaierror
:
1413 log
.info("Error With URL "+httpurl
);
1415 geturls_text
= httpconn
.getresponse();
1416 httpcodeout
= geturls_text
.status
;
1417 httpversionout
= "1.1";
1418 httpmethodout
= httpmethod
;
1419 httpurlout
= httpurl
;
1420 httpheaderout
= geturls_text
.getheaders();
1421 httpheadersentout
= httpheaders
;
1422 if(isinstance(httpheaderout
, list)):
1423 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1424 if(sys
.version
[0]=="2"):
1426 prehttpheaderout
= httpheaderout
;
1427 httpheaderkeys
= httpheaderout
.keys();
1428 imax
= len(httpheaderkeys
);
1432 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1434 except AttributeError:
1436 httpheaderout
= fix_header_names(httpheaderout
);
1437 if(isinstance(httpheadersentout
, list)):
1438 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1439 httpheadersentout
= fix_header_names(httpheadersentout
);
1440 downloadsize
= httpheaderout
.get('Content-Length');
1441 if(downloadsize
is not None):
1442 downloadsize
= int(downloadsize
);
1443 if downloadsize
is None: downloadsize
= 0;
1446 log
.info("Downloading URL "+httpurl
);
1447 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1448 tmpfilename
= f
.name
;
1450 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
1451 except AttributeError:
1453 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1458 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
1460 databytes
= geturls_text
.read(buffersize
);
1461 if not databytes
: break;
1462 datasize
= len(databytes
);
1463 fulldatasize
= datasize
+ fulldatasize
;
1466 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1467 downloaddiff
= fulldatasize
- prevdownsize
;
1468 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1469 prevdownsize
= fulldatasize
;
1472 geturls_text
.close();
1473 exec_time_end
= time
.time();
1474 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1475 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1478 if(not havehttplib2
):
1479 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1480 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
1484 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1485 global geturls_download_sleep
;
1487 sleep
= geturls_download_sleep
;
1488 if(not outfile
=="-"):
1489 outpath
= outpath
.rstrip(os
.path
.sep
);
1490 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1491 if(not os
.path
.exists(outpath
)):
1492 os
.makedirs(outpath
);
1493 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1495 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1497 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1498 if(not pretmpfilename
):
1500 tmpfilename
= pretmpfilename
['Filename'];
1501 downloadsize
= os
.path
.getsize(tmpfilename
);
1503 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1504 exec_time_start
= time
.time();
1505 shutil
.move(tmpfilename
, filepath
);
1507 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1508 except AttributeError:
1510 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1515 exec_time_end
= time
.time();
1516 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1517 if(os
.path
.exists(tmpfilename
)):
1518 os
.remove(tmpfilename
);
1519 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1520 if(outfile
=="-" and sys
.version
[0]=="2"):
1521 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1522 if(not pretmpfilename
):
1524 tmpfilename
= pretmpfilename
['Filename'];
1525 downloadsize
= os
.path
.getsize(tmpfilename
);
1528 exec_time_start
= time
.time();
1529 with
open(tmpfilename
, 'rb') as ft
:
1532 databytes
= ft
.read(buffersize
[1]);
1533 if not databytes
: break;
1534 datasize
= len(databytes
);
1535 fulldatasize
= datasize
+ fulldatasize
;
1538 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1539 downloaddiff
= fulldatasize
- prevdownsize
;
1540 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1541 prevdownsize
= fulldatasize
;
1544 fdata
= f
.getvalue();
1547 os
.remove(tmpfilename
);
1548 exec_time_end
= time
.time();
1549 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1550 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1551 if(outfile
=="-" and sys
.version
[0]>="3"):
1552 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1553 tmpfilename
= pretmpfilename
['Filename'];
1554 downloadsize
= os
.path
.getsize(tmpfilename
);
1557 exec_time_start
= time
.time();
1558 with
open(tmpfilename
, 'rb') as ft
:
1561 databytes
= ft
.read(buffersize
[1]);
1562 if not databytes
: break;
1563 datasize
= len(databytes
);
1564 fulldatasize
= datasize
+ fulldatasize
;
1567 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1568 downloaddiff
= fulldatasize
- prevdownsize
;
1569 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1570 prevdownsize
= fulldatasize
;
1573 fdata
= f
.getvalue();
1576 os
.remove(tmpfilename
);
1577 exec_time_end
= time
.time();
1578 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1579 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1582 if(not havehttplib2
):
1583 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1584 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
1587 def download_from_url_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1588 global geturls_download_sleep
, havebrotli
;
1590 sleep
= geturls_download_sleep
;
1591 urlparts
= urlparse
.urlparse(httpurl
);
1592 if(isinstance(httpheaders
, list)):
1593 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1594 httpheaders
= fix_header_names(httpheaders
);
1595 if(httpuseragent
is not None):
1596 if('User-Agent' in httpheaders
):
1597 httpheaders
['User-Agent'] = httpuseragent
;
1599 httpuseragent
.update({'User-Agent': httpuseragent
});
1600 if(httpreferer
is not None):
1601 if('Referer' in httpheaders
):
1602 httpheaders
['Referer'] = httpreferer
;
1604 httpuseragent
.update({'Referer': httpreferer
});
1605 if(urlparts
.username
is not None or urlparts
.password
is not None):
1606 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1607 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1608 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1609 if(isinstance(httpheaders
, dict)):
1610 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1611 geturls_opener
.addheaders
= httpheaders
;
1612 install_opener(geturls_opener
);
1614 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1615 if(postdata
is not None and not isinstance(postdata
, dict)):
1616 postdata
= urlencode(postdata
);
1618 if(httpmethod
=="GET"):
1619 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1620 geturls_text
= urlopen(geturls_request
);
1621 elif(httpmethod
=="POST"):
1622 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1623 geturls_text
= urlopen(geturls_request
, data
=postdata
);
1625 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1626 geturls_text
= urlopen(geturls_request
);
1627 except HTTPError
as geturls_text_error
:
1628 geturls_text
= geturls_text_error
;
1629 log
.info("Error With URL "+httpurl
);
1631 log
.info("Error With URL "+httpurl
);
1633 except socket
.timeout
:
1634 log
.info("Error With URL "+httpurl
);
1636 httpcodeout
= geturls_text
.getcode();
1637 httpversionout
= "1.1";
1638 httpmethodout
= httpmethod
;
1639 httpurlout
= geturls_text
.geturl();
1640 httpheaderout
= geturls_text
.headers
;
1641 httpheadersentout
= httpheaders
;
1642 if(isinstance(httpheaderout
, list)):
1643 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1644 if(sys
.version
[0]=="2"):
1646 prehttpheaderout
= httpheaderout
;
1647 httpheaderkeys
= httpheaderout
.keys();
1648 imax
= len(httpheaderkeys
);
1652 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1654 except AttributeError:
1656 httpheaderout
= fix_header_names(httpheaderout
);
1657 if(isinstance(httpheadersentout
, list)):
1658 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1659 httpheadersentout
= fix_header_names(httpheadersentout
);
1660 log
.info("Downloading URL "+httpurl
);
1661 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
1662 if(sys
.version
[0]=="2"):
1663 strbuf
= StringIO(geturls_text
.read());
1664 if(sys
.version
[0]>="3"):
1665 strbuf
= BytesIO(geturls_text
.read());
1666 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1667 returnval_content
= gzstrbuf
.read()[:];
1668 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
1669 returnval_content
= geturls_text
.read()[:];
1670 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1671 returnval_content
= geturls_text
.read()[:];
1672 returnval_content
= brotli
.decompress(returnval_content
);
1673 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
1674 geturls_text
.close();
1677 def download_from_url_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1678 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1679 exec_time_start
= time
.time();
1680 myhash
= hashlib
.new("sha1");
1681 if(sys
.version
[0]=="2"):
1682 myhash
.update(httpurl
);
1683 myhash
.update(str(buffersize
));
1684 myhash
.update(str(exec_time_start
));
1685 if(sys
.version
[0]>="3"):
1686 myhash
.update(httpurl
.encode('utf-8'));
1687 myhash
.update(str(buffersize
).encode('utf-8'));
1688 myhash
.update(str(exec_time_start
).encode('utf-8'));
1689 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1691 sleep
= geturls_download_sleep
;
1692 urlparts
= urlparse
.urlparse(httpurl
);
1693 if(isinstance(httpheaders
, list)):
1694 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1695 httpheaders
= fix_header_names(httpheaders
);
1696 if(httpuseragent
is not None):
1697 if('User-Agent' in httpheaders
):
1698 httpheaders
['User-Agent'] = httpuseragent
;
1700 httpuseragent
.update({'User-Agent': httpuseragent
});
1701 if(httpreferer
is not None):
1702 if('Referer' in httpheaders
):
1703 httpheaders
['Referer'] = httpreferer
;
1705 httpuseragent
.update({'Referer': httpreferer
});
1706 if(urlparts
.username
is not None or urlparts
.password
is not None):
1707 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1708 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1709 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1710 if(isinstance(httpheaders
, dict)):
1711 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1712 geturls_opener
.addheaders
= httpheaders
;
1713 install_opener(geturls_opener
);
1715 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1716 if(postdata
is not None and not isinstance(postdata
, dict)):
1717 postdata
= urlencode(postdata
);
1719 if(httpmethod
=="GET"):
1720 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1721 geturls_text
= urlopen(geturls_request
);
1722 elif(httpmethod
=="POST"):
1723 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1724 geturls_text
= urlopen(geturls_request
, data
=postdata
);
1726 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1727 geturls_text
= urlopen(geturls_request
);
1728 except HTTPError
as geturls_text_error
:
1729 geturls_text
= geturls_text_error
;
1730 log
.info("Error With URL "+httpurl
);
1732 log
.info("Error With URL "+httpurl
);
1734 except socket
.timeout
:
1735 log
.info("Error With URL "+httpurl
);
1737 httpcodeout
= geturls_text
.getcode();
1738 httpversionout
= "1.1";
1739 httpmethodout
= httpmethod
;
1740 httpurlout
= geturls_text
.geturl();
1741 httpheaderout
= geturls_text
.headers
;
1742 httpheadersentout
= httpheaders
;
1743 if(isinstance(httpheaderout
, list)):
1744 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1745 if(sys
.version
[0]=="2"):
1747 prehttpheaderout
= httpheaderout
;
1748 httpheaderkeys
= httpheaderout
.keys();
1749 imax
= len(httpheaderkeys
);
1753 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1755 except AttributeError:
1757 httpheaderout
= fix_header_names(httpheaderout
);
1758 if(isinstance(httpheadersentout
, list)):
1759 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1760 httpheadersentout
= fix_header_names(httpheadersentout
);
1761 downloadsize
= httpheaderout
.get('Content-Length');
1762 if(downloadsize
is not None):
1763 downloadsize
= int(downloadsize
);
1764 if downloadsize
is None: downloadsize
= 0;
1767 log
.info("Downloading URL "+httpurl
);
1768 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1769 tmpfilename
= f
.name
;
1771 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
1772 except AttributeError:
1774 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1779 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
1781 databytes
= geturls_text
.read(buffersize
);
1782 if not databytes
: break;
1783 datasize
= len(databytes
);
1784 fulldatasize
= datasize
+ fulldatasize
;
1787 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1788 downloaddiff
= fulldatasize
- prevdownsize
;
1789 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1790 prevdownsize
= fulldatasize
;
1793 geturls_text
.close();
1794 exec_time_end
= time
.time();
1795 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1796 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1799 def download_from_url_to_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1800 global geturls_download_sleep
;
1802 sleep
= geturls_download_sleep
;
1803 if(not outfile
=="-"):
1804 outpath
= outpath
.rstrip(os
.path
.sep
);
1805 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1806 if(not os
.path
.exists(outpath
)):
1807 os
.makedirs(outpath
);
1808 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1810 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1812 pretmpfilename
= download_from_url_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1813 if(not pretmpfilename
):
1815 tmpfilename
= pretmpfilename
['Filename'];
1816 downloadsize
= os
.path
.getsize(tmpfilename
);
1818 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1819 exec_time_start
= time
.time();
1820 shutil
.move(tmpfilename
, filepath
);
1822 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1823 except AttributeError:
1825 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1830 exec_time_end
= time
.time();
1831 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1832 if(os
.path
.exists(tmpfilename
)):
1833 os
.remove(tmpfilename
);
1834 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent':pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1835 if(outfile
=="-" and sys
.version
[0]=="2"):
1836 pretmpfilename
= download_from_url_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1837 if(not pretmpfilename
):
1839 tmpfilename
= pretmpfilename
['Filename'];
1840 downloadsize
= os
.path
.getsize(tmpfilename
);
1843 exec_time_start
= time
.time();
1844 with
open(tmpfilename
, 'rb') as ft
:
1847 databytes
= ft
.read(buffersize
[1]);
1848 if not databytes
: break;
1849 datasize
= len(databytes
);
1850 fulldatasize
= datasize
+ fulldatasize
;
1853 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1854 downloaddiff
= fulldatasize
- prevdownsize
;
1855 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1856 prevdownsize
= fulldatasize
;
1859 fdata
= f
.getvalue();
1862 os
.remove(tmpfilename
);
1863 exec_time_end
= time
.time();
1864 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1865 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1866 if(outfile
=="-" and sys
.version
[0]>="3"):
1867 pretmpfilename
= download_from_url_file_with_request(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1868 tmpfilename
= pretmpfilename
['Filename'];
1869 downloadsize
= os
.path
.getsize(tmpfilename
);
1872 exec_time_start
= time
.time();
1873 with
open(tmpfilename
, 'rb') as ft
:
1876 databytes
= ft
.read(buffersize
[1]);
1877 if not databytes
: break;
1878 datasize
= len(databytes
);
1879 fulldatasize
= datasize
+ fulldatasize
;
1882 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1883 downloaddiff
= fulldatasize
- prevdownsize
;
1884 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1885 prevdownsize
= fulldatasize
;
1888 fdata
= f
.getvalue();
1891 os
.remove(tmpfilename
);
1892 exec_time_end
= time
.time();
1893 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1894 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1898 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1899 global geturls_download_sleep
, havebrotli
;
1901 sleep
= geturls_download_sleep
;
1902 urlparts
= urlparse
.urlparse(httpurl
);
1903 if(isinstance(httpheaders
, list)):
1904 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1905 httpheaders
= fix_header_names(httpheaders
);
1906 if(httpuseragent
is not None):
1907 if('User-Agent' in httpheaders
):
1908 httpheaders
['User-Agent'] = httpuseragent
;
1910 httpuseragent
.update({'User-Agent': httpuseragent
});
1911 if(httpreferer
is not None):
1912 if('Referer' in httpheaders
):
1913 httpheaders
['Referer'] = httpreferer
;
1915 httpuseragent
.update({'Referer': httpreferer
});
1916 if(urlparts
.username
is not None or urlparts
.password
is not None):
1917 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1918 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1920 if(postdata
is not None and not isinstance(postdata
, dict)):
1921 postdata
= urlencode(postdata
);
1923 if(httpmethod
=="GET"):
1924 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1925 elif(httpmethod
=="POST"):
1926 geturls_text
= requests
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1928 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1929 except requests
.exceptions
.ConnectTimeout
:
1930 log
.info("Error With URL "+httpurl
);
1932 except requests
.exceptions
.ConnectError
:
1933 log
.info("Error With URL "+httpurl
);
1935 except socket
.timeout
:
1936 log
.info("Error With URL "+httpurl
);
1938 httpcodeout
= geturls_text
.status_code
;
1939 httpversionout
= "1.1";
1940 httpmethodout
= httpmethod
;
1941 httpurlout
= geturls_text
.url
;
1942 httpheaderout
= geturls_text
.headers
;
1943 httpheadersentout
= httpheaders
;
1944 if(isinstance(httpheaderout
, list)):
1945 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1946 if(sys
.version
[0]=="2"):
1948 prehttpheaderout
= httpheaderout
;
1949 httpheaderkeys
= httpheaderout
.keys();
1950 imax
= len(httpheaderkeys
);
1954 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1956 except AttributeError:
1958 httpheaderout
= fix_header_names(httpheaderout
);
1959 if(isinstance(httpheadersentout
, list)):
1960 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1961 httpheadersentout
= fix_header_names(httpheadersentout
);
1962 log
.info("Downloading URL "+httpurl
);
1963 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
1964 if(sys
.version
[0]=="2"):
1965 strbuf
= StringIO(gzstrbuf
.raw
.read());
1966 if(sys
.version
[0]>="3"):
1967 strbuf
= BytesIO(gzstrbuf
.raw
.read());
1968 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1969 returnval_content
= gzstrbuf
.read()[:];
1970 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
1971 returnval_content
= gzstrbuf
.raw
.read()[:];
1972 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1973 returnval_content
= gzstrbuf
.raw
.read()[:];
1974 returnval_content
= brotli
.decompress(returnval_content
);
1975 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
1976 geturls_text
.close();
1979 if(not haverequests
):
1980 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1981 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
)
1985 def download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1986 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1987 exec_time_start
= time
.time();
1988 myhash
= hashlib
.new("sha1");
1989 if(sys
.version
[0]=="2"):
1990 myhash
.update(httpurl
);
1991 myhash
.update(str(buffersize
));
1992 myhash
.update(str(exec_time_start
));
1993 if(sys
.version
[0]>="3"):
1994 myhash
.update(httpurl
.encode('utf-8'));
1995 myhash
.update(str(buffersize
).encode('utf-8'));
1996 myhash
.update(str(exec_time_start
).encode('utf-8'));
1997 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1999 sleep
= geturls_download_sleep
;
2000 urlparts
= urlparse
.urlparse(httpurl
);
2001 if(isinstance(httpheaders
, list)):
2002 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2003 httpheaders
= fix_header_names(httpheaders
);
2004 if(httpuseragent
is not None):
2005 if('User-Agent' in httpheaders
):
2006 httpheaders
['User-Agent'] = httpuseragent
;
2008 httpuseragent
.update({'User-Agent': httpuseragent
});
2009 if(httpreferer
is not None):
2010 if('Referer' in httpheaders
):
2011 httpheaders
['Referer'] = httpreferer
;
2013 httpuseragent
.update({'Referer': httpreferer
});
2014 if(urlparts
.username
is not None or urlparts
.password
is not None):
2015 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2016 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2018 if(postdata
is not None and not isinstance(postdata
, dict)):
2019 postdata
= urlencode(postdata
);
2021 if(httpmethod
=="GET"):
2022 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
2023 elif(httpmethod
=="POST"):
2024 geturls_text
= requests
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
2026 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
2027 except requests
.exceptions
.ConnectTimeout
:
2028 log
.info("Error With URL "+httpurl
);
2030 except requests
.exceptions
.ConnectError
:
2031 log
.info("Error With URL "+httpurl
);
2033 except socket
.timeout
:
2034 log
.info("Error With URL "+httpurl
);
2036 httpcodeout
= geturls_text
.status_code
;
2037 httpversionout
= "1.1";
2038 httpmethodout
= httpmethod
;
2039 httpurlout
= geturls_text
.url
;
2040 httpheaderout
= geturls_text
.headers
;
2041 httpheadersentout
= httpheaders
;
2042 if(isinstance(httpheaderout
, list)):
2043 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2044 if(sys
.version
[0]=="2"):
2046 prehttpheaderout
= httpheaderout
;
2047 httpheaderkeys
= httpheaderout
.keys();
2048 imax
= len(httpheaderkeys
);
2052 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2054 except AttributeError:
2056 httpheaderout
= fix_header_names(httpheaderout
);
2057 if(isinstance(httpheadersentout
, list)):
2058 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2059 httpheadersentout
= fix_header_names(httpheadersentout
);
2060 downloadsize
= int(httpheaderout
.get('Content-Length'));
2061 if(downloadsize
is not None):
2062 downloadsize
= int(downloadsize
);
2063 if downloadsize
is None: downloadsize
= 0;
2066 log
.info("Downloading URL "+httpurl
);
2067 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2068 tmpfilename
= f
.name
;
2070 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
2071 except AttributeError:
2073 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2078 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
2079 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
2080 datasize
= len(databytes
);
2081 fulldatasize
= datasize
+ fulldatasize
;
2084 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2085 downloaddiff
= fulldatasize
- prevdownsize
;
2086 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2087 prevdownsize
= fulldatasize
;
2090 geturls_text
.close();
2091 exec_time_end
= time
.time();
2092 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2093 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2096 if(not haverequests
):
2097 def download_from_url_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2098 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
2102 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2103 global geturls_download_sleep
;
2105 sleep
= geturls_download_sleep
;
2106 if(not outfile
=="-"):
2107 outpath
= outpath
.rstrip(os
.path
.sep
);
2108 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2109 if(not os
.path
.exists(outpath
)):
2110 os
.makedirs(outpath
);
2111 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2113 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2115 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2116 if(not pretmpfilename
):
2118 tmpfilename
= pretmpfilename
['Filename'];
2119 downloadsize
= os
.path
.getsize(tmpfilename
);
2121 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2122 exec_time_start
= time
.time();
2123 shutil
.move(tmpfilename
, filepath
);
2125 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2126 except AttributeError:
2128 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2133 exec_time_end
= time
.time();
2134 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2135 if(os
.path
.exists(tmpfilename
)):
2136 os
.remove(tmpfilename
);
2137 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2138 if(outfile
=="-" and sys
.version
[0]=="2"):
2139 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2140 if(not pretmpfilename
):
2142 tmpfilename
= pretmpfilename
['Filename'];
2143 downloadsize
= os
.path
.getsize(tmpfilename
);
2146 exec_time_start
= time
.time();
2147 with
open(tmpfilename
, 'rb') as ft
:
2150 databytes
= ft
.read(buffersize
[1]);
2151 if not databytes
: break;
2152 datasize
= len(databytes
);
2153 fulldatasize
= datasize
+ fulldatasize
;
2156 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2157 downloaddiff
= fulldatasize
- prevdownsize
;
2158 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2159 prevdownsize
= fulldatasize
;
2162 fdata
= f
.getvalue();
2165 os
.remove(tmpfilename
);
2166 exec_time_end
= time
.time();
2167 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2168 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2169 if(outfile
=="-" and sys
.version
[0]>="3"):
2170 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2171 tmpfilename
= pretmpfilename
['Filename'];
2172 downloadsize
= os
.path
.getsize(tmpfilename
);
2175 exec_time_start
= time
.time();
2176 with
open(tmpfilename
, 'rb') as ft
:
2179 databytes
= ft
.read(buffersize
[1]);
2180 if not databytes
: break;
2181 datasize
= len(databytes
);
2182 fulldatasize
= datasize
+ fulldatasize
;
2185 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2186 downloaddiff
= fulldatasize
- prevdownsize
;
2187 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2188 prevdownsize
= fulldatasize
;
2191 fdata
= f
.getvalue();
2194 os
.remove(tmpfilename
);
2195 exec_time_end
= time
.time();
2196 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2197 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2200 if(not haverequests
):
2201 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2202 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
2206 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2207 global geturls_download_sleep
, havebrotli
;
2209 sleep
= geturls_download_sleep
;
2210 urlparts
= urlparse
.urlparse(httpurl
);
2211 if(isinstance(httpheaders
, list)):
2212 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2213 httpheaders
= fix_header_names(httpheaders
);
2214 if(httpuseragent
is not None):
2215 if('User-Agent' in httpheaders
):
2216 httpheaders
['User-Agent'] = httpuseragent
;
2218 httpuseragent
.update({'User-Agent': httpuseragent
});
2219 if(httpreferer
is not None):
2220 if('Referer' in httpheaders
):
2221 httpheaders
['Referer'] = httpreferer
;
2223 httpuseragent
.update({'Referer': httpreferer
});
2224 if(urlparts
.username
is not None or urlparts
.password
is not None):
2225 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2226 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2228 if(postdata
is not None and not isinstance(postdata
, dict)):
2229 postdata
= urlencode(postdata
);
2231 if(httpmethod
=="GET"):
2232 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2233 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2234 elif(httpmethod
=="POST"):
2235 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2236 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2238 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2239 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2240 except httpx
.ConnectTimeout
:
2241 log
.info("Error With URL "+httpurl
);
2243 except httpx
.ConnectError
:
2244 log
.info("Error With URL "+httpurl
);
2246 except socket
.timeout
:
2247 log
.info("Error With URL "+httpurl
);
2249 httpcodeout
= geturls_text
.status_code
;
2250 httpversionout
= geturls_text
.http_version
;
2251 httpmethodout
= httpmethod
;
2252 httpurlout
= str(geturls_text
.url
);
2253 httpheaderout
= geturls_text
.headers
;
2254 httpheadersentout
= httpheaders
;
2255 if(isinstance(httpheaderout
, list)):
2256 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2257 if(sys
.version
[0]=="2"):
2259 prehttpheaderout
= httpheaderout
;
2260 httpheaderkeys
= httpheaderout
.keys();
2261 imax
= len(httpheaderkeys
);
2265 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2267 except AttributeError:
2269 httpheaderout
= fix_header_names(httpheaderout
);
2270 if(isinstance(httpheadersentout
, list)):
2271 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2272 httpheadersentout
= fix_header_names(httpheadersentout
);
2273 log
.info("Downloading URL "+httpurl
);
2274 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
2275 if(sys
.version
[0]=="2"):
2276 strbuf
= StringIO(geturls_text
.read());
2277 if(sys
.version
[0]>="3"):
2278 strbuf
= BytesIO(geturls_text
.read());
2279 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
2280 returnval_content
= gzstrbuf
.read()[:];
2281 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
2282 returnval_content
= geturls_text
.read()[:];
2283 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2284 returnval_content
= geturls_text
.read()[:];
2285 returnval_content
= brotli
.decompress(returnval_content
);
2286 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
2287 geturls_text
.close();
2291 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2292 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
)
2296 def download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2297 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
2298 exec_time_start
= time
.time();
2299 myhash
= hashlib
.new("sha1");
2300 if(sys
.version
[0]=="2"):
2301 myhash
.update(httpurl
);
2302 myhash
.update(str(buffersize
));
2303 myhash
.update(str(exec_time_start
));
2304 if(sys
.version
[0]>="3"):
2305 myhash
.update(httpurl
.encode('utf-8'));
2306 myhash
.update(str(buffersize
).encode('utf-8'));
2307 myhash
.update(str(exec_time_start
).encode('utf-8'));
2308 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2310 sleep
= geturls_download_sleep
;
2311 urlparts
= urlparse
.urlparse(httpurl
);
2312 if(isinstance(httpheaders
, list)):
2313 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2314 httpheaders
= fix_header_names(httpheaders
);
2315 if(httpuseragent
is not None):
2316 if('User-Agent' in httpheaders
):
2317 httpheaders
['User-Agent'] = httpuseragent
;
2319 httpuseragent
.update({'User-Agent': httpuseragent
});
2320 if(httpreferer
is not None):
2321 if('Referer' in httpheaders
):
2322 httpheaders
['Referer'] = httpreferer
;
2324 httpuseragent
.update({'Referer': httpreferer
});
2325 if(urlparts
.username
is not None or urlparts
.password
is not None):
2326 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2327 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2329 if(postdata
is not None and not isinstance(postdata
, dict)):
2330 postdata
= urlencode(postdata
);
2332 if(httpmethod
=="GET"):
2333 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2334 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2335 elif(httpmethod
=="POST"):
2336 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2337 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2339 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2340 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2341 except httpx
.ConnectTimeout
:
2342 log
.info("Error With URL "+httpurl
);
2344 except httpx
.ConnectError
:
2345 log
.info("Error With URL "+httpurl
);
2347 except socket
.timeout
:
2348 log
.info("Error With URL "+httpurl
);
2350 httpcodeout
= geturls_text
.status_code
;
2351 httpversionout
= geturls_text
.http_version
;
2352 httpmethodout
= httpmethod
;
2353 httpurlout
= str(geturls_text
.url
);
2354 httpheaderout
= geturls_text
.headers
;
2355 httpheadersentout
= httpheaders
;
2356 if(isinstance(httpheaderout
, list)):
2357 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2358 if(sys
.version
[0]=="2"):
2360 prehttpheaderout
= httpheaderout
;
2361 httpheaderkeys
= httpheaderout
.keys();
2362 imax
= len(httpheaderkeys
);
2366 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2368 except AttributeError:
2370 httpheaderout
= fix_header_names(httpheaderout
);
2371 if(isinstance(httpheadersentout
, list)):
2372 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2373 httpheadersentout
= fix_header_names(httpheadersentout
);
2374 downloadsize
= int(httpheaderout
.get('Content-Length'));
2375 if(downloadsize
is not None):
2376 downloadsize
= int(downloadsize
);
2377 if downloadsize
is None: downloadsize
= 0;
2380 log
.info("Downloading URL "+httpurl
);
2381 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2382 tmpfilename
= f
.name
;
2384 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
2385 except AttributeError:
2387 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2392 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
2393 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
2394 datasize
= len(databytes
);
2395 fulldatasize
= datasize
+ fulldatasize
;
2398 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2399 downloaddiff
= fulldatasize
- prevdownsize
;
2400 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2401 prevdownsize
= fulldatasize
;
2404 geturls_text
.close();
2405 exec_time_end
= time
.time();
2406 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2407 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2411 def download_from_url_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2412 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
2416 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2417 global geturls_download_sleep
;
2419 sleep
= geturls_download_sleep
;
2420 if(not outfile
=="-"):
2421 outpath
= outpath
.rstrip(os
.path
.sep
);
2422 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2423 if(not os
.path
.exists(outpath
)):
2424 os
.makedirs(outpath
);
2425 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2427 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2429 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2430 if(not pretmpfilename
):
2432 tmpfilename
= pretmpfilename
['Filename'];
2433 downloadsize
= os
.path
.getsize(tmpfilename
);
2435 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2436 exec_time_start
= time
.time();
2437 shutil
.move(tmpfilename
, filepath
);
2439 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2440 except AttributeError:
2442 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2447 exec_time_end
= time
.time();
2448 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2449 if(os
.path
.exists(tmpfilename
)):
2450 os
.remove(tmpfilename
);
2451 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2452 if(outfile
=="-" and sys
.version
[0]=="2"):
2453 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2454 if(not pretmpfilename
):
2456 tmpfilename
= pretmpfilename
['Filename'];
2457 downloadsize
= os
.path
.getsize(tmpfilename
);
2460 exec_time_start
= time
.time();
2461 with
open(tmpfilename
, 'rb') as ft
:
2464 databytes
= ft
.read(buffersize
[1]);
2465 if not databytes
: break;
2466 datasize
= len(databytes
);
2467 fulldatasize
= datasize
+ fulldatasize
;
2470 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2471 downloaddiff
= fulldatasize
- prevdownsize
;
2472 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2473 prevdownsize
= fulldatasize
;
2476 fdata
= f
.getvalue();
2479 os
.remove(tmpfilename
);
2480 exec_time_end
= time
.time();
2481 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2482 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2483 if(outfile
=="-" and sys
.version
[0]>="3"):
2484 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2485 tmpfilename
= pretmpfilename
['Filename'];
2486 downloadsize
= os
.path
.getsize(tmpfilename
);
2489 exec_time_start
= time
.time();
2490 with
open(tmpfilename
, 'rb') as ft
:
2493 databytes
= ft
.read(buffersize
[1]);
2494 if not databytes
: break;
2495 datasize
= len(databytes
);
2496 fulldatasize
= datasize
+ fulldatasize
;
2499 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2500 downloaddiff
= fulldatasize
- prevdownsize
;
2501 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2502 prevdownsize
= fulldatasize
;
2505 fdata
= f
.getvalue();
2508 os
.remove(tmpfilename
);
2509 exec_time_end
= time
.time();
2510 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2511 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2515 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2516 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
2520 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2521 global geturls_download_sleep
, havebrotli
;
2523 sleep
= geturls_download_sleep
;
2524 urlparts
= urlparse
.urlparse(httpurl
);
2525 if(isinstance(httpheaders
, list)):
2526 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2527 httpheaders
= fix_header_names(httpheaders
);
2528 if(httpuseragent
is not None):
2529 if('User-Agent' in httpheaders
):
2530 httpheaders
['User-Agent'] = httpuseragent
;
2532 httpuseragent
.update({'User-Agent': httpuseragent
});
2533 if(httpreferer
is not None):
2534 if('Referer' in httpheaders
):
2535 httpheaders
['Referer'] = httpreferer
;
2537 httpuseragent
.update({'Referer': httpreferer
});
2538 if(urlparts
.username
is not None or urlparts
.password
is not None):
2539 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2540 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2542 if(postdata
is not None and not isinstance(postdata
, dict)):
2543 postdata
= urlencode(postdata
);
2545 if(httpmethod
=="GET"):
2546 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2547 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2548 elif(httpmethod
=="POST"):
2549 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2550 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2552 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2553 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2554 except httpx
.ConnectTimeout
:
2555 log
.info("Error With URL "+httpurl
);
2557 except httpx
.ConnectError
:
2558 log
.info("Error With URL "+httpurl
);
2560 except socket
.timeout
:
2561 log
.info("Error With URL "+httpurl
);
2563 httpcodeout
= geturls_text
.status_code
;
2564 httpversionout
= geturls_text
.http_version
;
2565 httpmethodout
= httpmethod
;
2566 httpurlout
= str(geturls_text
.url
);
2567 httpheaderout
= geturls_text
.headers
;
2568 httpheadersentout
= httpheaders
;
2569 if(isinstance(httpheaderout
, list)):
2570 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2571 if(sys
.version
[0]=="2"):
2573 prehttpheaderout
= httpheaderout
;
2574 httpheaderkeys
= httpheaderout
.keys();
2575 imax
= len(httpheaderkeys
);
2579 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2581 except AttributeError:
2583 httpheaderout
= fix_header_names(httpheaderout
);
2584 if(isinstance(httpheadersentout
, list)):
2585 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2586 httpheadersentout
= fix_header_names(httpheadersentout
);
2587 log
.info("Downloading URL "+httpurl
);
2588 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
2589 if(sys
.version
[0]=="2"):
2590 strbuf
= StringIO(geturls_text
.read());
2591 if(sys
.version
[0]>="3"):
2592 strbuf
= BytesIO(geturls_text
.read());
2593 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
2594 returnval_content
= gzstrbuf
.read()[:];
2595 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
2596 returnval_content
= geturls_text
.read()[:];
2597 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2598 returnval_content
= geturls_text
.read()[:];
2599 returnval_content
= brotli
.decompress(returnval_content
);
2600 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
2601 geturls_text
.close();
2605 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2606 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
)
2610 def download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2611 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
2612 exec_time_start
= time
.time();
2613 myhash
= hashlib
.new("sha1");
2614 if(sys
.version
[0]=="2"):
2615 myhash
.update(httpurl
);
2616 myhash
.update(str(buffersize
));
2617 myhash
.update(str(exec_time_start
));
2618 if(sys
.version
[0]>="3"):
2619 myhash
.update(httpurl
.encode('utf-8'));
2620 myhash
.update(str(buffersize
).encode('utf-8'));
2621 myhash
.update(str(exec_time_start
).encode('utf-8'));
2622 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2624 sleep
= geturls_download_sleep
;
2625 urlparts
= urlparse
.urlparse(httpurl
);
2626 if(isinstance(httpheaders
, list)):
2627 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2628 httpheaders
= fix_header_names(httpheaders
);
2629 if(httpuseragent
is not None):
2630 if('User-Agent' in httpheaders
):
2631 httpheaders
['User-Agent'] = httpuseragent
;
2633 httpuseragent
.update({'User-Agent': httpuseragent
});
2634 if(httpreferer
is not None):
2635 if('Referer' in httpheaders
):
2636 httpheaders
['Referer'] = httpreferer
;
2638 httpuseragent
.update({'Referer': httpreferer
});
2639 if(urlparts
.username
is not None or urlparts
.password
is not None):
2640 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2641 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2643 if(postdata
is not None and not isinstance(postdata
, dict)):
2644 postdata
= urlencode(postdata
);
2646 if(httpmethod
=="GET"):
2647 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2648 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2649 elif(httpmethod
=="POST"):
2650 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2651 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2653 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2654 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2655 except httpx
.ConnectTimeout
:
2656 log
.info("Error With URL "+httpurl
);
2658 except httpx
.ConnectError
:
2659 log
.info("Error With URL "+httpurl
);
2661 except socket
.timeout
:
2662 log
.info("Error With URL "+httpurl
);
2664 httpcodeout
= geturls_text
.status_code
;
2665 httpversionout
= geturls_text
.http_version
;
2666 httpmethodout
= httpmethod
;
2667 httpurlout
= str(geturls_text
.url
);
2668 httpheaderout
= geturls_text
.headers
;
2669 httpheadersentout
= httpheaders
;
2670 if(isinstance(httpheaderout
, list)):
2671 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2672 if(sys
.version
[0]=="2"):
2674 prehttpheaderout
= httpheaderout
;
2675 httpheaderkeys
= httpheaderout
.keys();
2676 imax
= len(httpheaderkeys
);
2680 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2682 except AttributeError:
2684 httpheaderout
= fix_header_names(httpheaderout
);
2685 if(isinstance(httpheadersentout
, list)):
2686 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2687 httpheadersentout
= fix_header_names(httpheadersentout
);
2688 downloadsize
= int(httpheaderout
.get('Content-Length'));
2689 if(downloadsize
is not None):
2690 downloadsize
= int(downloadsize
);
2691 if downloadsize
is None: downloadsize
= 0;
2694 log
.info("Downloading URL "+httpurl
);
2695 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2696 tmpfilename
= f
.name
;
2698 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
2699 except AttributeError:
2701 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2706 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
2707 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
2708 datasize
= len(databytes
);
2709 fulldatasize
= datasize
+ fulldatasize
;
2712 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2713 downloaddiff
= fulldatasize
- prevdownsize
;
2714 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2715 prevdownsize
= fulldatasize
;
2718 geturls_text
.close();
2719 exec_time_end
= time
.time();
2720 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2721 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2725 def download_from_url_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2726 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
2730 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2731 global geturls_download_sleep
;
2733 sleep
= geturls_download_sleep
;
2734 if(not outfile
=="-"):
2735 outpath
= outpath
.rstrip(os
.path
.sep
);
2736 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2737 if(not os
.path
.exists(outpath
)):
2738 os
.makedirs(outpath
);
2739 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2741 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2743 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2744 if(not pretmpfilename
):
2746 tmpfilename
= pretmpfilename
['Filename'];
2747 downloadsize
= os
.path
.getsize(tmpfilename
);
2749 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2750 exec_time_start
= time
.time();
2751 shutil
.move(tmpfilename
, filepath
);
2753 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
2754 except AttributeError:
2756 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
2761 exec_time_end
= time
.time();
2762 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2763 if(os
.path
.exists(tmpfilename
)):
2764 os
.remove(tmpfilename
);
2765 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2766 if(outfile
=="-" and sys
.version
[0]=="2"):
2767 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2768 if(not pretmpfilename
):
2770 tmpfilename
= pretmpfilename
['Filename'];
2771 downloadsize
= os
.path
.getsize(tmpfilename
);
2774 exec_time_start
= time
.time();
2775 with
open(tmpfilename
, 'rb') as ft
:
2778 databytes
= ft
.read(buffersize
[1]);
2779 if not databytes
: break;
2780 datasize
= len(databytes
);
2781 fulldatasize
= datasize
+ fulldatasize
;
2784 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2785 downloaddiff
= fulldatasize
- prevdownsize
;
2786 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2787 prevdownsize
= fulldatasize
;
2790 fdata
= f
.getvalue();
2793 os
.remove(tmpfilename
);
2794 exec_time_end
= time
.time();
2795 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2796 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2797 if(outfile
=="-" and sys
.version
[0]>="3"):
2798 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2799 tmpfilename
= pretmpfilename
['Filename'];
2800 downloadsize
= os
.path
.getsize(tmpfilename
);
2803 exec_time_start
= time
.time();
2804 with
open(tmpfilename
, 'rb') as ft
:
2807 databytes
= ft
.read(buffersize
[1]);
2808 if not databytes
: break;
2809 datasize
= len(databytes
);
2810 fulldatasize
= datasize
+ fulldatasize
;
2813 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2814 downloaddiff
= fulldatasize
- prevdownsize
;
2815 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2816 prevdownsize
= fulldatasize
;
2819 fdata
= f
.getvalue();
2822 os
.remove(tmpfilename
);
2823 exec_time_end
= time
.time();
2824 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2825 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2829 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2830 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
2834 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2835 global geturls_download_sleep
, havebrotli
;
2837 sleep
= geturls_download_sleep
;
2838 urlparts
= urlparse
.urlparse(httpurl
);
2839 if(isinstance(httpheaders
, list)):
2840 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2841 httpheaders
= fix_header_names(httpheaders
);
2842 if(httpuseragent
is not None):
2843 if('User-Agent' in httpheaders
):
2844 httpheaders
['User-Agent'] = httpuseragent
;
2846 httpuseragent
.update({'User-Agent': httpuseragent
});
2847 if(httpreferer
is not None):
2848 if('Referer' in httpheaders
):
2849 httpheaders
['Referer'] = httpreferer
;
2851 httpuseragent
.update({'Referer': httpreferer
});
2852 if(urlparts
.username
is not None or urlparts
.password
is not None):
2853 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2854 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2856 if(postdata
is not None and not isinstance(postdata
, dict)):
2857 postdata
= urlencode(postdata
);
2859 if(httpmethod
=="GET"):
2860 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2861 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2862 elif(httpmethod
=="POST"):
2863 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2864 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2866 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2867 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2868 except httpcore
.ConnectTimeout
:
2869 log
.info("Error With URL "+httpurl
);
2871 except httpcore
.ConnectError
:
2872 log
.info("Error With URL "+httpurl
);
2874 except socket
.timeout
:
2875 log
.info("Error With URL "+httpurl
);
2877 httpcodeout
= geturls_text
.status
;
2878 httpversionout
= "1.1";
2879 httpmethodout
= httpmethod
;
2880 httpurlout
= str(httpurl
);
2881 httpheaderout
= geturls_text
.headers
;
2882 httpheadersentout
= httpheaders
;
2883 if(isinstance(httpheaderout
, list)):
2884 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2885 if(sys
.version
[0]=="2"):
2887 prehttpheaderout
= httpheaderout
;
2888 httpheaderkeys
= httpheaderout
.keys();
2889 imax
= len(httpheaderkeys
);
2893 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2895 except AttributeError:
2897 httpheaderout
= fix_header_names(httpheaderout
);
2898 if(isinstance(httpheadersentout
, list)):
2899 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2900 httpheadersentout
= fix_header_names(httpheadersentout
);
2901 log
.info("Downloading URL "+httpurl
);
2902 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
2903 if(sys
.version
[0]=="2"):
2904 strbuf
= StringIO(geturls_text
.read());
2905 if(sys
.version
[0]>="3"):
2906 strbuf
= BytesIO(geturls_text
.read());
2907 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
2908 returnval_content
= gzstrbuf
.read()[:];
2909 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
2910 returnval_content
= geturls_text
.read()[:];
2911 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2912 returnval_content
= geturls_text
.read()[:];
2913 returnval_content
= brotli
.decompress(returnval_content
);
2914 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
2915 geturls_text
.close();
2918 if(not havehttpcore
):
2919 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2920 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
)
2924 def download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2925 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
2926 exec_time_start
= time
.time();
2927 myhash
= hashlib
.new("sha1");
2928 if(sys
.version
[0]=="2"):
2929 myhash
.update(httpurl
);
2930 myhash
.update(str(buffersize
));
2931 myhash
.update(str(exec_time_start
));
2932 if(sys
.version
[0]>="3"):
2933 myhash
.update(httpurl
.encode('utf-8'));
2934 myhash
.update(str(buffersize
).encode('utf-8'));
2935 myhash
.update(str(exec_time_start
).encode('utf-8'));
2936 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2938 sleep
= geturls_download_sleep
;
2939 urlparts
= urlparse
.urlparse(httpurl
);
2940 if(isinstance(httpheaders
, list)):
2941 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2942 httpheaders
= fix_header_names(httpheaders
);
2943 if(httpuseragent
is not None):
2944 if('User-Agent' in httpheaders
):
2945 httpheaders
['User-Agent'] = httpuseragent
;
2947 httpuseragent
.update({'User-Agent': httpuseragent
});
2948 if(httpreferer
is not None):
2949 if('Referer' in httpheaders
):
2950 httpheaders
['Referer'] = httpreferer
;
2952 httpuseragent
.update({'Referer': httpreferer
});
2953 if(urlparts
.username
is not None or urlparts
.password
is not None):
2954 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2955 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2957 if(postdata
is not None and not isinstance(postdata
, dict)):
2958 postdata
= urlencode(postdata
);
2960 if(httpmethod
=="GET"):
2961 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2962 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2963 elif(httpmethod
=="POST"):
2964 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2965 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2967 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2968 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2969 except httpcore
.ConnectTimeout
:
2970 log
.info("Error With URL "+httpurl
);
2972 except httpcore
.ConnectError
:
2973 log
.info("Error With URL "+httpurl
);
2975 except socket
.timeout
:
2976 log
.info("Error With URL "+httpurl
);
2978 httpcodeout
= geturls_text
.status
;
2979 httpversionout
= "1.1";
2980 httpmethodout
= httpmethod
;
2981 httpurlout
= str(httpurl
);
2982 httpheaderout
= geturls_text
.headers
;
2983 httpheadersentout
= httpheaders
;
2984 if(isinstance(httpheaderout
, list)):
2985 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2986 if(sys
.version
[0]=="2"):
2988 prehttpheaderout
= httpheaderout
;
2989 httpheaderkeys
= httpheaderout
.keys();
2990 imax
= len(httpheaderkeys
);
2994 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
2996 except AttributeError:
2998 httpheaderout
= fix_header_names(httpheaderout
);
2999 if(isinstance(httpheadersentout
, list)):
3000 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3001 httpheadersentout
= fix_header_names(httpheadersentout
);
3002 downloadsize
= int(httpheaderout
.get('Content-Length'));
3003 if(downloadsize
is not None):
3004 downloadsize
= int(downloadsize
);
3005 if downloadsize
is None: downloadsize
= 0;
3008 log
.info("Downloading URL "+httpurl
);
3009 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3010 tmpfilename
= f
.name
;
3012 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
3013 except AttributeError:
3015 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3020 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
3021 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
3022 datasize
= len(databytes
);
3023 fulldatasize
= datasize
+ fulldatasize
;
3026 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3027 downloaddiff
= fulldatasize
- prevdownsize
;
3028 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3029 prevdownsize
= fulldatasize
;
3032 geturls_text
.close();
3033 exec_time_end
= time
.time();
3034 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3035 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3038 if(not havehttpcore
):
3039 def download_from_url_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
3040 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
3044 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3045 global geturls_download_sleep
;
3047 sleep
= geturls_download_sleep
;
3048 if(not outfile
=="-"):
3049 outpath
= outpath
.rstrip(os
.path
.sep
);
3050 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3051 if(not os
.path
.exists(outpath
)):
3052 os
.makedirs(outpath
);
3053 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3055 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3057 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3058 if(not pretmpfilename
):
3060 tmpfilename
= pretmpfilename
['Filename'];
3061 downloadsize
= os
.path
.getsize(tmpfilename
);
3063 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3064 exec_time_start
= time
.time();
3065 shutil
.move(tmpfilename
, filepath
);
3067 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3068 except AttributeError:
3070 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3075 exec_time_end
= time
.time();
3076 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3077 if(os
.path
.exists(tmpfilename
)):
3078 os
.remove(tmpfilename
);
3079 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3080 if(outfile
=="-" and sys
.version
[0]=="2"):
3081 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3082 if(not pretmpfilename
):
3084 tmpfilename
= pretmpfilename
['Filename'];
3085 downloadsize
= os
.path
.getsize(tmpfilename
);
3088 exec_time_start
= time
.time();
3089 with
open(tmpfilename
, 'rb') as ft
:
3092 databytes
= ft
.read(buffersize
[1]);
3093 if not databytes
: break;
3094 datasize
= len(databytes
);
3095 fulldatasize
= datasize
+ fulldatasize
;
3098 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3099 downloaddiff
= fulldatasize
- prevdownsize
;
3100 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3101 prevdownsize
= fulldatasize
;
3104 fdata
= f
.getvalue();
3107 os
.remove(tmpfilename
);
3108 exec_time_end
= time
.time();
3109 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3110 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3111 if(outfile
=="-" and sys
.version
[0]>="3"):
3112 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3113 tmpfilename
= pretmpfilename
['Filename'];
3114 downloadsize
= os
.path
.getsize(tmpfilename
);
3117 exec_time_start
= time
.time();
3118 with
open(tmpfilename
, 'rb') as ft
:
3121 databytes
= ft
.read(buffersize
[1]);
3122 if not databytes
: break;
3123 datasize
= len(databytes
);
3124 fulldatasize
= datasize
+ fulldatasize
;
3127 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3128 downloaddiff
= fulldatasize
- prevdownsize
;
3129 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3130 prevdownsize
= fulldatasize
;
3133 fdata
= f
.getvalue();
3136 os
.remove(tmpfilename
);
3137 exec_time_end
= time
.time();
3138 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3139 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3142 if(not havehttpcore
):
3143 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3144 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
3148 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
3149 global geturls_download_sleep
, havebrotli
;
3151 sleep
= geturls_download_sleep
;
3152 urlparts
= urlparse
.urlparse(httpurl
);
3153 if(isinstance(httpheaders
, list)):
3154 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3155 httpheaders
= fix_header_names(httpheaders
);
3156 if(httpuseragent
is not None):
3157 if('User-Agent' in httpheaders
):
3158 httpheaders
['User-Agent'] = httpuseragent
;
3160 httpuseragent
.update({'User-Agent': httpuseragent
});
3161 if(httpreferer
is not None):
3162 if('Referer' in httpheaders
):
3163 httpheaders
['Referer'] = httpreferer
;
3165 httpuseragent
.update({'Referer': httpreferer
});
3166 if(urlparts
.username
is not None or urlparts
.password
is not None):
3167 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3168 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3170 if(postdata
is not None and not isinstance(postdata
, dict)):
3171 postdata
= urlencode(postdata
);
3173 if(httpmethod
=="GET"):
3174 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3175 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3176 elif(httpmethod
=="POST"):
3177 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3178 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
3180 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3181 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3182 except httpcore
.ConnectTimeout
:
3183 log
.info("Error With URL "+httpurl
);
3185 except httpcore
.ConnectError
:
3186 log
.info("Error With URL "+httpurl
);
3188 except socket
.timeout
:
3189 log
.info("Error With URL "+httpurl
);
3191 httpcodeout
= geturls_text
.status
;
3192 httpversionout
= "1.1";
3193 httpmethodout
= httpmethod
;
3194 httpurlout
= str(httpurl
);
3195 httpheaderout
= geturls_text
.headers
;
3196 httpheadersentout
= httpheaders
;
3197 if(isinstance(httpheaderout
, list)):
3198 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3199 if(sys
.version
[0]=="2"):
3201 prehttpheaderout
= httpheaderout
;
3202 httpheaderkeys
= httpheaderout
.keys();
3203 imax
= len(httpheaderkeys
);
3207 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3209 except AttributeError:
3211 httpheaderout
= fix_header_names(httpheaderout
);
3212 if(isinstance(httpheadersentout
, list)):
3213 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3214 httpheadersentout
= fix_header_names(httpheadersentout
);
3215 log
.info("Downloading URL "+httpurl
);
3216 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
3217 if(sys
.version
[0]=="2"):
3218 strbuf
= StringIO(geturls_text
.read());
3219 if(sys
.version
[0]>="3"):
3220 strbuf
= BytesIO(geturls_text
.read());
3221 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
3222 returnval_content
= gzstrbuf
.read()[:];
3223 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
3224 returnval_content
= geturls_text
.read()[:];
3225 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3226 returnval_content
= geturls_text
.read()[:];
3227 returnval_content
= brotli
.decompress(returnval_content
);
3228 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
3229 geturls_text
.close();
3232 if(not havehttpcore
):
3233 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
3234 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
)
3238 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
3239 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
3240 exec_time_start
= time
.time();
3241 myhash
= hashlib
.new("sha1");
3242 if(sys
.version
[0]=="2"):
3243 myhash
.update(httpurl
);
3244 myhash
.update(str(buffersize
));
3245 myhash
.update(str(exec_time_start
));
3246 if(sys
.version
[0]>="3"):
3247 myhash
.update(httpurl
.encode('utf-8'));
3248 myhash
.update(str(buffersize
).encode('utf-8'));
3249 myhash
.update(str(exec_time_start
).encode('utf-8'));
3250 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3252 sleep
= geturls_download_sleep
;
3253 urlparts
= urlparse
.urlparse(httpurl
);
3254 if(isinstance(httpheaders
, list)):
3255 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3256 httpheaders
= fix_header_names(httpheaders
);
3257 if(httpuseragent
is not None):
3258 if('User-Agent' in httpheaders
):
3259 httpheaders
['User-Agent'] = httpuseragent
;
3261 httpuseragent
.update({'User-Agent': httpuseragent
});
3262 if(httpreferer
is not None):
3263 if('Referer' in httpheaders
):
3264 httpheaders
['Referer'] = httpreferer
;
3266 httpuseragent
.update({'Referer': httpreferer
});
3267 if(urlparts
.username
is not None or urlparts
.password
is not None):
3268 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3269 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3271 if(postdata
is not None and not isinstance(postdata
, dict)):
3272 postdata
= urlencode(postdata
);
3274 if(httpmethod
=="GET"):
3275 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3276 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3277 elif(httpmethod
=="POST"):
3278 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3279 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
3281 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
3282 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
3283 except httpcore
.ConnectTimeout
:
3284 log
.info("Error With URL "+httpurl
);
3286 except httpcore
.ConnectError
:
3287 log
.info("Error With URL "+httpurl
);
3289 except socket
.timeout
:
3290 log
.info("Error With URL "+httpurl
);
3292 httpcodeout
= geturls_text
.status
;
3293 httpversionout
= "1.1";
3294 httpmethodout
= httpmethod
;
3295 httpurlout
= str(httpurl
);
3296 httpheaderout
= geturls_text
.headers
;
3297 httpheadersentout
= httpheaders
;
3298 if(isinstance(httpheaderout
, list)):
3299 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3300 if(sys
.version
[0]=="2"):
3302 prehttpheaderout
= httpheaderout
;
3303 httpheaderkeys
= httpheaderout
.keys();
3304 imax
= len(httpheaderkeys
);
3308 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3310 except AttributeError:
3312 httpheaderout
= fix_header_names(httpheaderout
);
3313 if(isinstance(httpheadersentout
, list)):
3314 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3315 httpheadersentout
= fix_header_names(httpheadersentout
);
3316 downloadsize
= int(httpheaderout
.get('Content-Length'));
3317 if(downloadsize
is not None):
3318 downloadsize
= int(downloadsize
);
3319 if downloadsize
is None: downloadsize
= 0;
3322 log
.info("Downloading URL "+httpurl
);
3323 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3324 tmpfilename
= f
.name
;
3326 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
3327 except AttributeError:
3329 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3334 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
3335 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
3336 datasize
= len(databytes
);
3337 fulldatasize
= datasize
+ fulldatasize
;
3340 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3341 downloaddiff
= fulldatasize
- prevdownsize
;
3342 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3343 prevdownsize
= fulldatasize
;
3346 geturls_text
.close();
3347 exec_time_end
= time
.time();
3348 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3349 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3352 if(not havehttpcore
):
3353 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
3354 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
3358 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3359 global geturls_download_sleep
;
3361 sleep
= geturls_download_sleep
;
3362 if(not outfile
=="-"):
3363 outpath
= outpath
.rstrip(os
.path
.sep
);
3364 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3365 if(not os
.path
.exists(outpath
)):
3366 os
.makedirs(outpath
);
3367 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3369 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3371 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3372 if(not pretmpfilename
):
3374 tmpfilename
= pretmpfilename
['Filename'];
3375 downloadsize
= os
.path
.getsize(tmpfilename
);
3377 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3378 exec_time_start
= time
.time();
3379 shutil
.move(tmpfilename
, filepath
);
3381 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3382 except AttributeError:
3384 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3389 exec_time_end
= time
.time();
3390 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3391 if(os
.path
.exists(tmpfilename
)):
3392 os
.remove(tmpfilename
);
3393 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3394 if(outfile
=="-" and sys
.version
[0]=="2"):
3395 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3396 if(not pretmpfilename
):
3398 tmpfilename
= pretmpfilename
['Filename'];
3399 downloadsize
= os
.path
.getsize(tmpfilename
);
3402 exec_time_start
= time
.time();
3403 with
open(tmpfilename
, 'rb') as ft
:
3406 databytes
= ft
.read(buffersize
[1]);
3407 if not databytes
: break;
3408 datasize
= len(databytes
);
3409 fulldatasize
= datasize
+ fulldatasize
;
3412 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3413 downloaddiff
= fulldatasize
- prevdownsize
;
3414 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3415 prevdownsize
= fulldatasize
;
3418 fdata
= f
.getvalue();
3421 os
.remove(tmpfilename
);
3422 exec_time_end
= time
.time();
3423 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3424 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3425 if(outfile
=="-" and sys
.version
[0]>="3"):
3426 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3427 tmpfilename
= pretmpfilename
['Filename'];
3428 downloadsize
= os
.path
.getsize(tmpfilename
);
3431 exec_time_start
= time
.time();
3432 with
open(tmpfilename
, 'rb') as ft
:
3435 databytes
= ft
.read(buffersize
[1]);
3436 if not databytes
: break;
3437 datasize
= len(databytes
);
3438 fulldatasize
= datasize
+ fulldatasize
;
3441 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3442 downloaddiff
= fulldatasize
- prevdownsize
;
3443 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3444 prevdownsize
= fulldatasize
;
3447 fdata
= f
.getvalue();
3450 os
.remove(tmpfilename
);
3451 exec_time_end
= time
.time();
3452 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3453 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3457 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3458 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
3462 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
3463 global geturls_download_sleep
, havebrotli
;
3465 sleep
= geturls_download_sleep
;
3466 urlparts
= urlparse
.urlparse(httpurl
);
3467 if(isinstance(httpheaders
, list)):
3468 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3469 httpheaders
= fix_header_names(httpheaders
);
3470 if(httpuseragent
is not None):
3471 if('User-Agent' in httpheaders
):
3472 httpheaders
['User-Agent'] = httpuseragent
;
3474 httpuseragent
.update({'User-Agent': httpuseragent
});
3475 if(httpreferer
is not None):
3476 if('Referer' in httpheaders
):
3477 httpheaders
['Referer'] = httpreferer
;
3479 httpuseragent
.update({'Referer': httpreferer
});
3480 if(urlparts
.username
is not None or urlparts
.password
is not None):
3481 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3482 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3484 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
3485 if(postdata
is not None and not isinstance(postdata
, dict)):
3486 postdata
= urlencode(postdata
);
3488 if(httpmethod
=="GET"):
3489 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3490 elif(httpmethod
=="POST"):
3491 geturls_text
= geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
3493 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3494 except urllib3
.exceptions
.ConnectTimeoutError
:
3495 log
.info("Error With URL "+httpurl
);
3497 except urllib3
.exceptions
.ConnectError
:
3498 log
.info("Error With URL "+httpurl
);
3500 except urllib3
.exceptions
.MaxRetryError
:
3501 log
.info("Error With URL "+httpurl
);
3503 except socket
.timeout
:
3504 log
.info("Error With URL "+httpurl
);
3506 httpcodeout
= geturls_text
.status
;
3507 httpversionout
= "1.1";
3508 httpmethodout
= httpmethod
;
3509 httpurlout
= geturls_text
.geturl();
3510 httpheaderout
= geturls_text
.info();
3511 httpheadersentout
= httpheaders
;
3512 if(isinstance(httpheaderout
, list)):
3513 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3514 if(sys
.version
[0]=="2"):
3516 prehttpheaderout
= httpheaderout
;
3517 httpheaderkeys
= httpheaderout
.keys();
3518 imax
= len(httpheaderkeys
);
3522 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3524 except AttributeError:
3526 httpheaderout
= fix_header_names(httpheaderout
);
3527 if(isinstance(httpheadersentout
, list)):
3528 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3529 httpheadersentout
= fix_header_names(httpheadersentout
);
3530 log
.info("Downloading URL "+httpurl
);
3531 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
3532 if(sys
.version
[0]=="2"):
3533 strbuf
= StringIO(geturls_text
.read());
3534 if(sys
.version
[0]>="3"):
3535 strbuf
= BytesIO(geturls_text
.read());
3536 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
3537 returnval_content
= gzstrbuf
.read()[:];
3538 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
3539 returnval_content
= geturls_text
.read()[:];
3540 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3541 returnval_content
= geturls_text
.read()[:];
3542 returnval_content
= brotli
.decompress(returnval_content
);
3543 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
3544 geturls_text
.close();
3547 if(not haveurllib3
):
3548 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
3549 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
)
3553 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
3554 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
3555 exec_time_start
= time
.time();
3556 myhash
= hashlib
.new("sha1");
3557 if(sys
.version
[0]=="2"):
3558 myhash
.update(httpurl
);
3559 myhash
.update(str(buffersize
));
3560 myhash
.update(str(exec_time_start
));
3561 if(sys
.version
[0]>="3"):
3562 myhash
.update(httpurl
.encode('utf-8'));
3563 myhash
.update(str(buffersize
).encode('utf-8'));
3564 myhash
.update(str(exec_time_start
).encode('utf-8'));
3565 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3567 sleep
= geturls_download_sleep
;
3568 urlparts
= urlparse
.urlparse(httpurl
);
3569 if(isinstance(httpheaders
, list)):
3570 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3571 httpheaders
= fix_header_names(httpheaders
);
3572 if(httpuseragent
is not None):
3573 if('User-Agent' in httpheaders
):
3574 httpheaders
['User-Agent'] = httpuseragent
;
3576 httpuseragent
.update({'User-Agent': httpuseragent
});
3577 if(httpreferer
is not None):
3578 if('Referer' in httpheaders
):
3579 httpheaders
['Referer'] = httpreferer
;
3581 httpuseragent
.update({'Referer': httpreferer
});
3582 if(urlparts
.username
is not None or urlparts
.password
is not None):
3583 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3584 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3586 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
3587 if(postdata
is not None and not isinstance(postdata
, dict)):
3588 postdata
= urlencode(postdata
);
3590 if(httpmethod
=="GET"):
3591 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3592 elif(httpmethod
=="POST"):
3593 geturls_text
= geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
3595 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3596 except urllib3
.exceptions
.ConnectTimeoutError
:
3597 log
.info("Error With URL "+httpurl
);
3599 except urllib3
.exceptions
.ConnectError
:
3600 log
.info("Error With URL "+httpurl
);
3602 except urllib3
.exceptions
.MaxRetryError
:
3603 log
.info("Error With URL "+httpurl
);
3605 except socket
.timeout
:
3606 log
.info("Error With URL "+httpurl
);
3608 httpcodeout
= geturls_text
.status
;
3609 httpversionout
= "1.1";
3610 httpmethodout
= httpmethod
;
3611 httpurlout
= geturls_text
.geturl();
3612 httpheaderout
= geturls_text
.info();
3613 httpheadersentout
= httpheaders
;
3614 if(isinstance(httpheaderout
, list)):
3615 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3616 if(sys
.version
[0]=="2"):
3618 prehttpheaderout
= httpheaderout
;
3619 httpheaderkeys
= httpheaderout
.keys();
3620 imax
= len(httpheaderkeys
);
3624 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3626 except AttributeError:
3628 httpheaderout
= fix_header_names(httpheaderout
);
3629 if(isinstance(httpheadersentout
, list)):
3630 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3631 httpheadersentout
= fix_header_names(httpheadersentout
);
3632 downloadsize
= int(geturls_text
.headers
.get('Content-Length'));
3633 if(downloadsize
is not None):
3634 downloadsize
= int(downloadsize
);
3635 if downloadsize
is None: downloadsize
= 0;
3638 log
.info("Downloading URL "+httpurl
);
3639 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3640 tmpfilename
= f
.name
;
3642 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
3643 except AttributeError:
3645 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3650 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
3652 databytes
= geturls_text
.read(buffersize
);
3653 if not databytes
: break;
3654 datasize
= len(databytes
);
3655 fulldatasize
= datasize
+ fulldatasize
;
3658 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3659 downloaddiff
= fulldatasize
- prevdownsize
;
3660 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3661 prevdownsize
= fulldatasize
;
3664 geturls_text
.close();
3665 exec_time_end
= time
.time();
3666 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3667 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3670 if(not haveurllib3
):
3671 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
3672 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
3676 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3677 global geturls_download_sleep
;
3679 sleep
= geturls_download_sleep
;
3680 if(not outfile
=="-"):
3681 outpath
= outpath
.rstrip(os
.path
.sep
);
3682 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3683 if(not os
.path
.exists(outpath
)):
3684 os
.makedirs(outpath
);
3685 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3687 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3689 pretmpfilename
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3690 if(not pretmpfilename
):
3692 tmpfilename
= pretmpfilename
['Filename'];
3693 downloadsize
= os
.path
.getsize(tmpfilename
);
3695 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3696 exec_time_start
= time
.time();
3697 shutil
.move(tmpfilename
, filepath
);
3699 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
3700 except AttributeError:
3702 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3707 exec_time_end
= time
.time();
3708 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3709 if(os
.path
.exists(tmpfilename
)):
3710 os
.remove(tmpfilename
);
3711 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3712 if(outfile
=="-" and sys
.version
[0]=="2"):
3713 pretmpfilename
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3714 if(not pretmpfilename
):
3716 tmpfilename
= pretmpfilename
['Filename'];
3717 downloadsize
= os
.path
.getsize(tmpfilename
);
3720 exec_time_start
= time
.time();
3721 with
open(tmpfilename
, 'rb') as ft
:
3724 databytes
= ft
.read(buffersize
[1]);
3725 if not databytes
: break;
3726 datasize
= len(databytes
);
3727 fulldatasize
= datasize
+ fulldatasize
;
3730 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3731 downloaddiff
= fulldatasize
- prevdownsize
;
3732 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3733 prevdownsize
= fulldatasize
;
3736 fdata
= f
.getvalue();
3739 os
.remove(tmpfilename
);
3740 exec_time_end
= time
.time();
3741 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3742 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3743 if(outfile
=="-" and sys
.version
[0]>="3"):
3744 pretmpfilename
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3745 tmpfilename
= pretmpfilename
['Filename'];
3746 downloadsize
= os
.path
.getsize(tmpfilename
);
3749 exec_time_start
= time
.time();
3750 with
open(tmpfilename
, 'rb') as ft
:
3753 databytes
= ft
.read(buffersize
[1]);
3754 if not databytes
: break;
3755 datasize
= len(databytes
);
3756 fulldatasize
= datasize
+ fulldatasize
;
3759 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3760 downloaddiff
= fulldatasize
- prevdownsize
;
3761 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3762 prevdownsize
= fulldatasize
;
3765 fdata
= f
.getvalue();
3768 os
.remove(tmpfilename
);
3769 exec_time_end
= time
.time();
3770 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3771 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3774 if(not haveurllib3
):
3775 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3776 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
3780 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
3781 global geturls_download_sleep
, havebrotli
;
3783 sleep
= geturls_download_sleep
;
3784 urlparts
= urlparse
.urlparse(httpurl
);
3785 if(isinstance(httpheaders
, list)):
3786 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3787 httpheaders
= fix_header_names(httpheaders
);
3788 if(httpuseragent
is not None):
3789 if('User-Agent' in httpheaders
):
3790 httpheaders
['User-Agent'] = httpuseragent
;
3792 httpuseragent
.update({'User-Agent': httpuseragent
});
3793 if(httpreferer
is not None):
3794 if('Referer' in httpheaders
):
3795 httpheaders
['Referer'] = httpreferer
;
3797 httpuseragent
.update({'Referer': httpreferer
});
3798 if(urlparts
.username
is not None or urlparts
.password
is not None):
3799 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3800 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3802 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
3803 if(postdata
is not None and not isinstance(postdata
, dict)):
3804 postdata
= urlencode(postdata
);
3806 if(httpmethod
=="GET"):
3807 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3808 elif(httpmethod
=="POST"):
3809 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
3811 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3812 except urllib3
.exceptions
.ConnectTimeoutError
:
3813 log
.info("Error With URL "+httpurl
);
3815 except urllib3
.exceptions
.ConnectError
:
3816 log
.info("Error With URL "+httpurl
);
3818 except urllib3
.exceptions
.MaxRetryError
:
3819 log
.info("Error With URL "+httpurl
);
3821 except socket
.timeout
:
3822 log
.info("Error With URL "+httpurl
);
3824 httpcodeout
= geturls_text
.status
;
3825 httpversionout
= "1.1";
3826 httpmethodout
= httpmethod
;
3827 httpurlout
= geturls_text
.geturl();
3828 httpheaderout
= geturls_text
.info();
3829 httpheadersentout
= httpheaders
;
3830 if(isinstance(httpheaderout
, list)):
3831 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3832 if(sys
.version
[0]=="2"):
3834 prehttpheaderout
= httpheaderout
;
3835 httpheaderkeys
= httpheaderout
.keys();
3836 imax
= len(httpheaderkeys
);
3840 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3842 except AttributeError:
3844 httpheaderout
= fix_header_names(httpheaderout
);
3845 if(isinstance(httpheadersentout
, list)):
3846 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3847 httpheadersentout
= fix_header_names(httpheadersentout
);
3848 log
.info("Downloading URL "+httpurl
);
3849 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
3850 if(sys
.version
[0]=="2"):
3851 strbuf
= StringIO(geturls_text
.read());
3852 if(sys
.version
[0]>="3"):
3853 strbuf
= BytesIO(geturls_text
.read());
3854 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
3855 returnval_content
= gzstrbuf
.read()[:];
3856 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
3857 returnval_content
= geturls_text
.read()[:];
3858 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3859 returnval_content
= geturls_text
.read()[:];
3860 returnval_content
= brotli
.decompress(returnval_content
);
3861 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
3862 geturls_text
.close();
3865 if(not haveurllib3
):
3866 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
3867 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
)
3871 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
3872 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
3873 exec_time_start
= time
.time();
3874 myhash
= hashlib
.new("sha1");
3875 if(sys
.version
[0]=="2"):
3876 myhash
.update(httpurl
);
3877 myhash
.update(str(buffersize
));
3878 myhash
.update(str(exec_time_start
));
3879 if(sys
.version
[0]>="3"):
3880 myhash
.update(httpurl
.encode('utf-8'));
3881 myhash
.update(str(buffersize
).encode('utf-8'));
3882 myhash
.update(str(exec_time_start
).encode('utf-8'));
3883 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3885 sleep
= geturls_download_sleep
;
3886 urlparts
= urlparse
.urlparse(httpurl
);
3887 if(isinstance(httpheaders
, list)):
3888 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3889 httpheaders
= fix_header_names(httpheaders
);
3890 if(httpuseragent
is not None):
3891 if('User-Agent' in httpheaders
):
3892 httpheaders
['User-Agent'] = httpuseragent
;
3894 httpuseragent
.update({'User-Agent': httpuseragent
});
3895 if(httpreferer
is not None):
3896 if('Referer' in httpheaders
):
3897 httpheaders
['Referer'] = httpreferer
;
3899 httpuseragent
.update({'Referer': httpreferer
});
3900 if(urlparts
.username
is not None or urlparts
.password
is not None):
3901 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3902 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3904 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
3905 if(postdata
is not None and not isinstance(postdata
, dict)):
3906 postdata
= urlencode(postdata
);
3908 if(httpmethod
=="GET"):
3909 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3910 elif(httpmethod
=="POST"):
3911 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
3913 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3914 except urllib3
.exceptions
.ConnectTimeoutError
:
3915 log
.info("Error With URL "+httpurl
);
3917 except urllib3
.exceptions
.ConnectError
:
3918 log
.info("Error With URL "+httpurl
);
3920 except urllib3
.exceptions
.MaxRetryError
:
3921 log
.info("Error With URL "+httpurl
);
3923 except socket
.timeout
:
3924 log
.info("Error With URL "+httpurl
);
3926 httpcodeout
= geturls_text
.status
;
3927 httpversionout
= "1.1";
3928 httpmethodout
= httpmethod
;
3929 httpurlout
= geturls_text
.geturl();
3930 httpheaderout
= geturls_text
.info();
3931 httpheadersentout
= httpheaders
;
3932 if(isinstance(httpheaderout
, list)):
3933 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3934 if(sys
.version
[0]=="2"):
3936 prehttpheaderout
= httpheaderout
;
3937 httpheaderkeys
= httpheaderout
.keys();
3938 imax
= len(httpheaderkeys
);
3942 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
3944 except AttributeError:
3946 httpheaderout
= fix_header_names(httpheaderout
);
3947 if(isinstance(httpheadersentout
, list)):
3948 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3949 httpheadersentout
= fix_header_names(httpheadersentout
);
3950 downloadsize
= int(geturls_text
.headers
.get('Content-Length'));
3951 if(downloadsize
is not None):
3952 downloadsize
= int(downloadsize
);
3953 if downloadsize
is None: downloadsize
= 0;
3956 log
.info("Downloading URL "+httpurl
);
3957 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3958 tmpfilename
= f
.name
;
3960 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
3961 except AttributeError:
3963 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
3968 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
3970 databytes
= geturls_text
.read(buffersize
);
3971 if not databytes
: break;
3972 datasize
= len(databytes
);
3973 fulldatasize
= datasize
+ fulldatasize
;
3976 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3977 downloaddiff
= fulldatasize
- prevdownsize
;
3978 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3979 prevdownsize
= fulldatasize
;
3982 geturls_text
.close();
3983 exec_time_end
= time
.time();
3984 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3985 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3988 if(not haveurllib3
):
3989 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
3990 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
3994 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3995 global geturls_download_sleep
;
3997 sleep
= geturls_download_sleep
;
3998 if(not outfile
=="-"):
3999 outpath
= outpath
.rstrip(os
.path
.sep
);
4000 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4001 if(not os
.path
.exists(outpath
)):
4002 os
.makedirs(outpath
);
4003 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4005 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4007 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4008 if(not pretmpfilename
):
4010 tmpfilename
= pretmpfilename
['Filename'];
4011 downloadsize
= os
.path
.getsize(tmpfilename
);
4013 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4014 exec_time_start
= time
.time();
4015 shutil
.move(tmpfilename
, filepath
);
4017 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4018 except AttributeError:
4020 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4025 exec_time_end
= time
.time();
4026 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4027 if(os
.path
.exists(tmpfilename
)):
4028 os
.remove(tmpfilename
);
4029 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
4030 if(outfile
=="-" and sys
.version
[0]=="2"):
4031 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4032 if(not pretmpfilename
):
4034 tmpfilename
= pretmpfilename
['Filename'];
4035 downloadsize
= os
.path
.getsize(tmpfilename
);
4038 exec_time_start
= time
.time();
4039 with
open(tmpfilename
, 'rb') as ft
:
4042 databytes
= ft
.read(buffersize
[1]);
4043 if not databytes
: break;
4044 datasize
= len(databytes
);
4045 fulldatasize
= datasize
+ fulldatasize
;
4048 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4049 downloaddiff
= fulldatasize
- prevdownsize
;
4050 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4051 prevdownsize
= fulldatasize
;
4054 fdata
= f
.getvalue();
4057 os
.remove(tmpfilename
);
4058 exec_time_end
= time
.time();
4059 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4060 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
4061 if(outfile
=="-" and sys
.version
[0]>="3"):
4062 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4063 tmpfilename
= pretmpfilename
['Filename'];
4064 downloadsize
= os
.path
.getsize(tmpfilename
);
4067 exec_time_start
= time
.time();
4068 with
open(tmpfilename
, 'rb') as ft
:
4071 databytes
= ft
.read(buffersize
[1]);
4072 if not databytes
: break;
4073 datasize
= len(databytes
);
4074 fulldatasize
= datasize
+ fulldatasize
;
4077 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4078 downloaddiff
= fulldatasize
- prevdownsize
;
4079 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4080 prevdownsize
= fulldatasize
;
4083 fdata
= f
.getvalue();
4086 os
.remove(tmpfilename
);
4087 exec_time_end
= time
.time();
4088 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4089 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
4092 if(not haveurllib3
):
4093 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
4094 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
4098 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
4099 global geturls_download_sleep
, havebrotli
;
4101 sleep
= geturls_download_sleep
;
4102 urlparts
= urlparse
.urlparse(httpurl
);
4103 if(isinstance(httpheaders
, list)):
4104 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4105 httpheaders
= fix_header_names(httpheaders
);
4106 if(httpuseragent
is not None):
4107 if('User-Agent' in httpheaders
):
4108 httpheaders
['User-Agent'] = httpuseragent
;
4110 httpuseragent
.update({'User-Agent': httpuseragent
});
4111 if(httpreferer
is not None):
4112 if('Referer' in httpheaders
):
4113 httpheaders
['Referer'] = httpreferer
;
4115 httpuseragent
.update({'Referer': httpreferer
});
4116 if(urlparts
.username
is not None or urlparts
.password
is not None):
4117 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4118 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4119 geturls_opener
= mechanize
.Browser();
4120 if(isinstance(httpheaders
, dict)):
4121 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4123 geturls_opener
.addheaders
= httpheaders
;
4124 geturls_opener
.set_cookiejar(httpcookie
);
4125 geturls_opener
.set_handle_robots(False);
4126 if(postdata
is not None and not isinstance(postdata
, dict)):
4127 postdata
= urlencode(postdata
);
4129 if(httpmethod
=="GET"):
4130 geturls_text
= geturls_opener
.open(httpurl
);
4131 elif(httpmethod
=="POST"):
4132 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
4134 geturls_text
= geturls_opener
.open(httpurl
);
4135 except mechanize
.HTTPError
as geturls_text_error
:
4136 geturls_text
= geturls_text_error
;
4137 log
.info("Error With URL "+httpurl
);
4139 log
.info("Error With URL "+httpurl
);
4141 except socket
.timeout
:
4142 log
.info("Error With URL "+httpurl
);
4144 httpcodeout
= geturls_text
.code
;
4145 httpversionout
= "1.1";
4146 httpmethodout
= httpmethod
;
4147 httpurlout
= geturls_text
.geturl();
4148 httpheaderout
= geturls_text
.info();
4149 httpheadersentout
= httpheaders
;
4150 if(isinstance(httpheaderout
, list)):
4151 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
4152 if(sys
.version
[0]=="2"):
4154 prehttpheaderout
= httpheaderout
;
4155 httpheaderkeys
= httpheaderout
.keys();
4156 imax
= len(httpheaderkeys
);
4160 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4162 except AttributeError:
4164 httpheaderout
= fix_header_names(httpheaderout
);
4165 if(isinstance(httpheadersentout
, list)):
4166 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
4167 httpheadersentout
= fix_header_names(httpheadersentout
);
4168 log
.info("Downloading URL "+httpurl
);
4169 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
4170 if(sys
.version
[0]=="2"):
4171 strbuf
= StringIO(geturls_text
.read());
4172 if(sys
.version
[0]>="3"):
4173 strbuf
= BytesIO(geturls_text
.read());
4174 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
4175 returnval_content
= gzstrbuf
.read()[:];
4176 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
4177 returnval_content
= geturls_text
.read()[:];
4178 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
4179 returnval_content
= geturls_text
.read()[:];
4180 returnval_content
= brotli
.decompress(returnval_content
);
4181 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
4182 geturls_text
.close();
4185 if(not havemechanize
):
4186 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
4187 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, sleep
)
4191 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
4192 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
4193 exec_time_start
= time
.time();
4194 myhash
= hashlib
.new("sha1");
4195 if(sys
.version
[0]=="2"):
4196 myhash
.update(httpurl
);
4197 myhash
.update(str(buffersize
));
4198 myhash
.update(str(exec_time_start
));
4199 if(sys
.version
[0]>="3"):
4200 myhash
.update(httpurl
.encode('utf-8'));
4201 myhash
.update(str(buffersize
).encode('utf-8'));
4202 myhash
.update(str(exec_time_start
).encode('utf-8'));
4203 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4205 sleep
= geturls_download_sleep
;
4206 urlparts
= urlparse
.urlparse(httpurl
);
4207 if(isinstance(httpheaders
, list)):
4208 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4209 httpheaders
= fix_header_names(httpheaders
);
4210 if(httpuseragent
is not None):
4211 if('User-Agent' in httpheaders
):
4212 httpheaders
['User-Agent'] = httpuseragent
;
4214 httpuseragent
.update({'User-Agent': httpuseragent
});
4215 if(httpreferer
is not None):
4216 if('Referer' in httpheaders
):
4217 httpheaders
['Referer'] = httpreferer
;
4219 httpuseragent
.update({'Referer': httpreferer
});
4220 if(urlparts
.username
is not None or urlparts
.password
is not None):
4221 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
4222 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
4223 geturls_opener
= mechanize
.Browser();
4224 if(isinstance(httpheaders
, dict)):
4225 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4227 geturls_opener
.addheaders
= httpheaders
;
4228 geturls_opener
.set_cookiejar(httpcookie
);
4229 geturls_opener
.set_handle_robots(False);
4230 if(postdata
is not None and not isinstance(postdata
, dict)):
4231 postdata
= urlencode(postdata
);
4233 if(httpmethod
=="GET"):
4234 geturls_text
= geturls_opener
.open(httpurl
);
4235 elif(httpmethod
=="POST"):
4236 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
4238 geturls_text
= geturls_opener
.open(httpurl
);
4239 except mechanize
.HTTPError
as geturls_text_error
:
4240 geturls_text
= geturls_text_error
;
4241 log
.info("Error With URL "+httpurl
);
4243 log
.info("Error With URL "+httpurl
);
4245 except socket
.timeout
:
4246 log
.info("Error With URL "+httpurl
);
4248 httpcodeout
= geturls_text
.code
;
4249 httpversionout
= "1.1";
4250 httpmethodout
= httpmethod
;
4251 httpurlout
= geturls_text
.geturl();
4252 httpheaderout
= geturls_text
.info();
4253 httpheadersentout
= httpheaders
;
4254 if(isinstance(httpheaderout
, list)):
4255 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
4256 if(sys
.version
[0]=="2"):
4258 prehttpheaderout
= httpheaderout
;
4259 httpheaderkeys
= httpheaderout
.keys();
4260 imax
= len(httpheaderkeys
);
4264 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
4266 except AttributeError:
4268 httpheaderout
= fix_header_names(httpheaderout
);
4269 if(isinstance(httpheadersentout
, list)):
4270 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
4271 httpheadersentout
= fix_header_names(httpheadersentout
);
4272 downloadsize
= int(httpheaderout
.get('Content-Length'));
4273 if(downloadsize
is not None):
4274 downloadsize
= int(downloadsize
);
4275 if downloadsize
is None: downloadsize
= 0;
4278 log
.info("Downloading URL "+httpurl
);
4279 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4280 tmpfilename
= f
.name
;
4282 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
4283 except AttributeError:
4285 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4290 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
4292 databytes
= geturls_text
.read(buffersize
);
4293 if not databytes
: break;
4294 datasize
= len(databytes
);
4295 fulldatasize
= datasize
+ fulldatasize
;
4298 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4299 downloaddiff
= fulldatasize
- prevdownsize
;
4300 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4301 prevdownsize
= fulldatasize
;
4304 geturls_text
.close();
4305 exec_time_end
= time
.time();
4306 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4307 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4310 if(not havemechanize
):
4311 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
4312 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
4316 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
4317 global geturls_download_sleep
;
4319 sleep
= geturls_download_sleep
;
4320 if(not outfile
=="-"):
4321 outpath
= outpath
.rstrip(os
.path
.sep
);
4322 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4323 if(not os
.path
.exists(outpath
)):
4324 os
.makedirs(outpath
);
4325 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4327 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4329 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4330 if(not pretmpfilename
):
4332 tmpfilename
= pretmpfilename
['Filename'];
4333 downloadsize
= os
.path
.getsize(tmpfilename
);
4335 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4336 exec_time_start
= time
.time();
4337 shutil
.move(tmpfilename
, filepath
);
4339 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
4340 except AttributeError:
4342 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
4347 exec_time_end
= time
.time();
4348 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4349 if(os
.path
.exists(tmpfilename
)):
4350 os
.remove(tmpfilename
);
4351 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
4352 if(outfile
=="-" and sys
.version
[0]=="2"):
4353 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4354 if(not pretmpfilename
):
4356 tmpfilename
= pretmpfilename
['Filename'];
4357 downloadsize
= os
.path
.getsize(tmpfilename
);
4360 exec_time_start
= time
.time();
4361 with
open(tmpfilename
, 'rb') as ft
:
4364 databytes
= ft
.read(buffersize
[1]);
4365 if not databytes
: break;
4366 datasize
= len(databytes
);
4367 fulldatasize
= datasize
+ fulldatasize
;
4370 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4371 downloaddiff
= fulldatasize
- prevdownsize
;
4372 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4373 prevdownsize
= fulldatasize
;
4376 fdata
= f
.getvalue();
4379 os
.remove(tmpfilename
);
4380 exec_time_end
= time
.time();
4381 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4382 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
4383 if(outfile
=="-" and sys
.version
[0]>="3"):
4384 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4385 tmpfilename
= pretmpfilename
['Filename'];
4386 downloadsize
= os
.path
.getsize(tmpfilename
);
4389 exec_time_start
= time
.time();
4390 with
open(tmpfilename
, 'rb') as ft
:
4393 databytes
= ft
.read(buffersize
[1]);
4394 if not databytes
: break;
4395 datasize
= len(databytes
);
4396 fulldatasize
= datasize
+ fulldatasize
;
4399 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4400 downloaddiff
= fulldatasize
- prevdownsize
;
4401 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4402 prevdownsize
= fulldatasize
;
4405 fdata
= f
.getvalue();
4408 os
.remove(tmpfilename
);
4409 exec_time_end
= time
.time();
4410 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4411 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
4414 if(not havemechanize
):
4415 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
4416 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
4419 def download_file_from_ftp_file(url
):
4420 urlparts
= urlparse
.urlparse(url
);
4421 file_name
= os
.path
.basename(urlparts
.path
);
4422 file_dir
= os
.path
.dirname(urlparts
.path
);
4423 if(urlparts
.username
is not None):
4424 ftp_username
= urlparts
.username
;
4426 ftp_username
= "anonymous";
4427 if(urlparts
.password
is not None):
4428 ftp_password
= urlparts
.password
;
4429 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4430 ftp_password
= "anonymous";
4433 if(urlparts
.scheme
=="ftp"):
4435 elif(urlparts
.scheme
=="ftps"):
4439 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4441 ftp_port
= urlparts
.port
;
4442 if(urlparts
.port
is None):
4445 ftp
.connect(urlparts
.hostname
, ftp_port
);
4446 except socket
.gaierror
:
4447 log
.info("Error With URL "+httpurl
);
4449 except socket
.timeout
:
4450 log
.info("Error With URL "+httpurl
);
4452 ftp
.login(urlparts
.username
, urlparts
.password
);
4453 if(urlparts
.scheme
=="ftps"):
4455 ftpfile
= BytesIO();
4456 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
4457 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
4462 def download_file_from_ftp_string(url
):
4463 ftpfile
= download_file_from_ftp_file(url
);
4464 return ftpfile
.read();
4466 def download_from_url_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
4467 global geturls_download_sleep
, havebrotli
;
4469 sleep
= geturls_download_sleep
;
4470 urlparts
= urlparse
.urlparse(httpurl
);
4471 if(isinstance(httpheaders
, list)):
4472 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4473 httpheaders
= fix_header_names(httpheaders
);
4474 if(httpuseragent
is not None):
4475 if('User-Agent' in httpheaders
):
4476 httpheaders
['User-Agent'] = httpuseragent
;
4478 httpuseragent
.update({'User-Agent': httpuseragent
});
4479 if(httpreferer
is not None):
4480 if('Referer' in httpheaders
):
4481 httpheaders
['Referer'] = httpreferer
;
4483 httpuseragent
.update({'Referer': httpreferer
});
4484 if(isinstance(httpheaders
, dict)):
4485 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4487 geturls_text
= download_file_from_ftp_file(httpurl
);
4488 if(not geturls_text
):
4490 log
.info("Downloading URL "+httpurl
);
4491 returnval_content
= geturls_text
.read()[:];
4492 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4493 geturls_text
.close();
4496 def download_from_url_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
4497 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
4498 exec_time_start
= time
.time();
4499 myhash
= hashlib
.new("sha1");
4500 if(sys
.version
[0]=="2"):
4501 myhash
.update(httpurl
);
4502 myhash
.update(str(buffersize
));
4503 myhash
.update(str(exec_time_start
));
4504 if(sys
.version
[0]>="3"):
4505 myhash
.update(httpurl
.encode('utf-8'));
4506 myhash
.update(str(buffersize
).encode('utf-8'));
4507 myhash
.update(str(exec_time_start
).encode('utf-8'));
4508 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4510 sleep
= geturls_download_sleep
;
4511 urlparts
= urlparse
.urlparse(httpurl
);
4512 if(isinstance(httpheaders
, list)):
4513 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4514 httpheaders
= fix_header_names(httpheaders
);
4515 if(httpuseragent
is not None):
4516 if('User-Agent' in httpheaders
):
4517 httpheaders
['User-Agent'] = httpuseragent
;
4519 httpuseragent
.update({'User-Agent': httpuseragent
});
4520 if(httpreferer
is not None):
4521 if('Referer' in httpheaders
):
4522 httpheaders
['Referer'] = httpreferer
;
4524 httpuseragent
.update({'Referer': httpreferer
});
4525 if(isinstance(httpheaders
, dict)):
4526 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4528 geturls_text
= download_file_from_ftp_file(httpurl
);
4529 if(not geturls_text
):
4531 geturls_text
.seek(0, 2);
4532 downloadsize
= geturls_text
.tell();
4533 geturls_text
.seek(0, 0);
4534 if(downloadsize
is not None):
4535 downloadsize
= int(downloadsize
);
4536 if downloadsize
is None: downloadsize
= 0;
4539 log
.info("Downloading URL "+httpurl
);
4540 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4541 tmpfilename
= f
.name
;
4542 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4544 databytes
= geturls_text
.read(buffersize
);
4545 if not databytes
: break;
4546 datasize
= len(databytes
);
4547 fulldatasize
= datasize
+ fulldatasize
;
4550 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4551 downloaddiff
= fulldatasize
- prevdownsize
;
4552 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4553 prevdownsize
= fulldatasize
;
4556 geturls_text
.close();
4557 exec_time_end
= time
.time();
4558 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4559 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4562 def download_from_url_to_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
4563 global geturls_download_sleep
;
4565 sleep
= geturls_download_sleep
;
4566 if(not outfile
=="-"):
4567 outpath
= outpath
.rstrip(os
.path
.sep
);
4568 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4569 if(not os
.path
.exists(outpath
)):
4570 os
.makedirs(outpath
);
4571 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4573 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4575 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4576 if(not pretmpfilename
):
4578 tmpfilename
= pretmpfilename
['Filename'];
4579 downloadsize
= os
.path
.getsize(tmpfilename
);
4581 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4582 exec_time_start
= time
.time();
4583 shutil
.move(tmpfilename
, filepath
);
4584 exec_time_end
= time
.time();
4585 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4586 if(os
.path
.exists(tmpfilename
)):
4587 os
.remove(tmpfilename
);
4588 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
4589 if(outfile
=="-" and sys
.version
[0]=="2"):
4590 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4591 if(not pretmpfilename
):
4593 tmpfilename
= pretmpfilename
['Filename'];
4594 downloadsize
= os
.path
.getsize(tmpfilename
);
4597 exec_time_start
= time
.time();
4598 with
open(tmpfilename
, 'rb') as ft
:
4601 databytes
= ft
.read(buffersize
[1]);
4602 if not databytes
: break;
4603 datasize
= len(databytes
);
4604 fulldatasize
= datasize
+ fulldatasize
;
4607 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4608 downloaddiff
= fulldatasize
- prevdownsize
;
4609 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4610 prevdownsize
= fulldatasize
;
4613 fdata
= f
.getvalue();
4616 os
.remove(tmpfilename
);
4617 exec_time_end
= time
.time();
4618 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4619 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
4620 if(outfile
=="-" and sys
.version
[0]>="3"):
4621 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4622 tmpfilename
= pretmpfilename
['Filename'];
4623 downloadsize
= os
.path
.getsize(tmpfilename
);
4626 exec_time_start
= time
.time();
4627 with
open(tmpfilename
, 'rb') as ft
:
4630 databytes
= ft
.read(buffersize
[1]);
4631 if not databytes
: break;
4632 datasize
= len(databytes
);
4633 fulldatasize
= datasize
+ fulldatasize
;
4636 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4637 downloaddiff
= fulldatasize
- prevdownsize
;
4638 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4639 prevdownsize
= fulldatasize
;
4642 fdata
= f
.getvalue();
4645 os
.remove(tmpfilename
);
4646 exec_time_end
= time
.time();
4647 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4648 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
4651 def upload_file_to_ftp_file(ftpfile
, url
):
4652 urlparts
= urlparse
.urlparse(url
);
4653 file_name
= os
.path
.basename(urlparts
.path
);
4654 file_dir
= os
.path
.dirname(urlparts
.path
);
4655 if(urlparts
.username
is not None):
4656 ftp_username
= urlparts
.username
;
4658 ftp_username
= "anonymous";
4659 if(urlparts
.password
is not None):
4660 ftp_password
= urlparts
.password
;
4661 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4662 ftp_password
= "anonymous";
4665 if(urlparts
.scheme
=="ftp"):
4667 elif(urlparts
.scheme
=="ftps"):
4671 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4673 ftp_port
= urlparts
.port
;
4674 if(urlparts
.port
is None):
4677 ftp
.connect(urlparts
.hostname
, ftp_port
);
4678 except socket
.gaierror
:
4679 log
.info("Error With URL "+httpurl
);
4681 except socket
.timeout
:
4682 log
.info("Error With URL "+httpurl
);
4684 ftp
.login(urlparts
.username
, urlparts
.password
);
4685 if(urlparts
.scheme
=="ftps"):
4687 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
4692 def upload_file_to_ftp_string(ftpstring
, url
):
4693 ftpfileo
= BytesIO(ftpstring
);
4694 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
4699 def download_file_from_sftp_file(url
):
4700 urlparts
= urlparse
.urlparse(url
);
4701 file_name
= os
.path
.basename(urlparts
.path
);
4702 file_dir
= os
.path
.dirname(urlparts
.path
);
4703 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4705 sftp_port
= urlparts
.port
;
4706 if(urlparts
.port
is None):
4709 sftp_port
= urlparts
.port
;
4710 if(urlparts
.username
is not None):
4711 sftp_username
= urlparts
.username
;
4713 sftp_username
= "anonymous";
4714 if(urlparts
.password
is not None):
4715 sftp_password
= urlparts
.password
;
4716 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4717 sftp_password
= "anonymous";
4720 if(urlparts
.scheme
!="sftp"):
4722 ssh
= paramiko
.SSHClient();
4723 ssh
.load_system_host_keys();
4724 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
4726 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
4727 except paramiko
.ssh_exception
.SSHException
:
4729 except socket
.gaierror
:
4730 log
.info("Error With URL "+httpurl
);
4732 except socket
.timeout
:
4733 log
.info("Error With URL "+httpurl
);
4735 sftp
= ssh
.open_sftp();
4736 sftpfile
= BytesIO();
4737 sftp
.getfo(urlparts
.path
, sftpfile
);
4740 sftpfile
.seek(0, 0);
4743 def download_file_from_sftp_file(url
):
4747 def download_file_from_sftp_string(url
):
4748 sftpfile
= download_file_from_sftp_file(url
);
4749 return sftpfile
.read();
4751 def download_file_from_ftp_string(url
):
4755 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
4756 global geturls_download_sleep
, havebrotli
;
4758 sleep
= geturls_download_sleep
;
4759 urlparts
= urlparse
.urlparse(httpurl
);
4760 if(isinstance(httpheaders
, list)):
4761 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4762 httpheaders
= fix_header_names(httpheaders
);
4763 if(httpuseragent
is not None):
4764 if('User-Agent' in httpheaders
):
4765 httpheaders
['User-Agent'] = httpuseragent
;
4767 httpuseragent
.update({'User-Agent': httpuseragent
});
4768 if(httpreferer
is not None):
4769 if('Referer' in httpheaders
):
4770 httpheaders
['Referer'] = httpreferer
;
4772 httpuseragent
.update({'Referer': httpreferer
});
4773 if(isinstance(httpheaders
, dict)):
4774 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4776 geturls_text
= download_file_from_sftp_file(httpurl
);
4777 if(not geturls_text
):
4779 log
.info("Downloading URL "+httpurl
);
4780 returnval_content
= geturls_text
.read()[:];
4781 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4782 geturls_text
.close();
4785 if(not haveparamiko
):
4786 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
4790 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
4791 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
4792 exec_time_start
= time
.time();
4793 myhash
= hashlib
.new("sha1");
4794 if(sys
.version
[0]=="2"):
4795 myhash
.update(httpurl
);
4796 myhash
.update(str(buffersize
));
4797 myhash
.update(str(exec_time_start
));
4798 if(sys
.version
[0]>="3"):
4799 myhash
.update(httpurl
.encode('utf-8'));
4800 myhash
.update(str(buffersize
).encode('utf-8'));
4801 myhash
.update(str(exec_time_start
).encode('utf-8'));
4802 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4804 sleep
= geturls_download_sleep
;
4805 urlparts
= urlparse
.urlparse(httpurl
);
4806 if(isinstance(httpheaders
, list)):
4807 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4808 httpheaders
= fix_header_names(httpheaders
);
4809 if(httpuseragent
is not None):
4810 if('User-Agent' in httpheaders
):
4811 httpheaders
['User-Agent'] = httpuseragent
;
4813 httpuseragent
.update({'User-Agent': httpuseragent
});
4814 if(httpreferer
is not None):
4815 if('Referer' in httpheaders
):
4816 httpheaders
['Referer'] = httpreferer
;
4818 httpuseragent
.update({'Referer': httpreferer
});
4819 if(isinstance(httpheaders
, dict)):
4820 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4822 geturls_text
= download_file_from_sftp_file(httpurl
);
4823 if(not geturls_text
):
4825 geturls_text
.seek(0, 2);
4826 downloadsize
= geturls_text
.tell();
4827 geturls_text
.seek(0, 0);
4828 if(downloadsize
is not None):
4829 downloadsize
= int(downloadsize
);
4830 if downloadsize
is None: downloadsize
= 0;
4833 log
.info("Downloading URL "+httpurl
);
4834 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4835 tmpfilename
= f
.name
;
4836 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4838 databytes
= geturls_text
.read(buffersize
);
4839 if not databytes
: break;
4840 datasize
= len(databytes
);
4841 fulldatasize
= datasize
+ fulldatasize
;
4844 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4845 downloaddiff
= fulldatasize
- prevdownsize
;
4846 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4847 prevdownsize
= fulldatasize
;
4850 geturls_text
.close();
4851 exec_time_end
= time
.time();
4852 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4853 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4856 if(not haveparamiko
):
4857 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
4861 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
4862 global geturls_download_sleep
;
4864 sleep
= geturls_download_sleep
;
4865 if(not outfile
=="-"):
4866 outpath
= outpath
.rstrip(os
.path
.sep
);
4867 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4868 if(not os
.path
.exists(outpath
)):
4869 os
.makedirs(outpath
);
4870 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4872 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4874 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4875 if(not pretmpfilename
):
4877 tmpfilename
= pretmpfilename
['Filename'];
4878 downloadsize
= os
.path
.getsize(tmpfilename
);
4880 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4881 exec_time_start
= time
.time();
4882 shutil
.move(tmpfilename
, filepath
);
4883 exec_time_end
= time
.time();
4884 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4885 if(os
.path
.exists(tmpfilename
)):
4886 os
.remove(tmpfilename
);
4887 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
4888 if(outfile
=="-" and sys
.version
[0]=="2"):
4889 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4890 if(not pretmpfilename
):
4892 tmpfilename
= pretmpfilename
['Filename'];
4893 downloadsize
= os
.path
.getsize(tmpfilename
);
4896 exec_time_start
= time
.time();
4897 with
open(tmpfilename
, 'rb') as ft
:
4900 databytes
= ft
.read(buffersize
[1]);
4901 if not databytes
: break;
4902 datasize
= len(databytes
);
4903 fulldatasize
= datasize
+ fulldatasize
;
4906 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4907 downloaddiff
= fulldatasize
- prevdownsize
;
4908 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4909 prevdownsize
= fulldatasize
;
4912 fdata
= f
.getvalue();
4915 os
.remove(tmpfilename
);
4916 exec_time_end
= time
.time();
4917 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4918 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
4919 if(outfile
=="-" and sys
.version
[0]>="3"):
4920 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4921 tmpfilename
= pretmpfilename
['Filename'];
4922 downloadsize
= os
.path
.getsize(tmpfilename
);
4925 exec_time_start
= time
.time();
4926 with
open(tmpfilename
, 'rb') as ft
:
4929 databytes
= ft
.read(buffersize
[1]);
4930 if not databytes
: break;
4931 datasize
= len(databytes
);
4932 fulldatasize
= datasize
+ fulldatasize
;
4935 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4936 downloaddiff
= fulldatasize
- prevdownsize
;
4937 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4938 prevdownsize
= fulldatasize
;
4941 fdata
= f
.getvalue();
4944 os
.remove(tmpfilename
);
4945 exec_time_end
= time
.time();
4946 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4947 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
4950 if(not haveparamiko
):
4951 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
4955 def upload_file_to_sftp_file(sftpfile
, url
):
4956 urlparts
= urlparse
.urlparse(url
);
4957 file_name
= os
.path
.basename(urlparts
.path
);
4958 file_dir
= os
.path
.dirname(urlparts
.path
);
4959 sftp_port
= urlparts
.port
;
4960 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4962 if(urlparts
.port
is None):
4965 sftp_port
= urlparts
.port
;
4966 if(urlparts
.username
is not None):
4967 sftp_username
= urlparts
.username
;
4969 sftp_username
= "anonymous";
4970 if(urlparts
.password
is not None):
4971 sftp_password
= urlparts
.password
;
4972 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4973 sftp_password
= "anonymous";
4976 if(urlparts
.scheme
!="sftp"):
4978 ssh
= paramiko
.SSHClient();
4979 ssh
.load_system_host_keys();
4980 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
4982 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
4983 except paramiko
.ssh_exception
.SSHException
:
4985 except socket
.gaierror
:
4986 log
.info("Error With URL "+httpurl
);
4988 except socket
.timeout
:
4989 log
.info("Error With URL "+httpurl
);
4991 sftp
= ssh
.open_sftp();
4992 sftp
.putfo(sftpfile
, urlparts
.path
);
4995 sftpfile
.seek(0, 0);
4998 def upload_file_to_sftp_file(sftpfile
, url
):
5002 def upload_file_to_sftp_string(sftpstring
, url
):
5003 sftpfileo
= BytesIO(sftpstring
);
5004 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
5008 def upload_file_to_sftp_string(url
):
5013 def download_file_from_pysftp_file(url
):
5014 urlparts
= urlparse
.urlparse(url
);
5015 file_name
= os
.path
.basename(urlparts
.path
);
5016 file_dir
= os
.path
.dirname(urlparts
.path
);
5017 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5019 sftp_port
= urlparts
.port
;
5020 if(urlparts
.port
is None):
5023 sftp_port
= urlparts
.port
;
5024 if(urlparts
.username
is not None):
5025 sftp_username
= urlparts
.username
;
5027 sftp_username
= "anonymous";
5028 if(urlparts
.password
is not None):
5029 sftp_password
= urlparts
.password
;
5030 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5031 sftp_password
= "anonymous";
5034 if(urlparts
.scheme
!="sftp"):
5037 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5038 except paramiko
.ssh_exception
.SSHException
:
5040 except socket
.gaierror
:
5041 log
.info("Error With URL "+httpurl
);
5043 except socket
.timeout
:
5044 log
.info("Error With URL "+httpurl
);
5046 sftp
= ssh
.open_sftp();
5047 sftpfile
= BytesIO();
5048 sftp
.getfo(urlparts
.path
, sftpfile
);
5051 sftpfile
.seek(0, 0);
5054 def download_file_from_pysftp_file(url
):
5058 def download_file_from_pysftp_string(url
):
5059 sftpfile
= download_file_from_pysftp_file(url
);
5060 return sftpfile
.read();
5062 def download_file_from_ftp_string(url
):
5066 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
5067 global geturls_download_sleep
, havebrotli
;
5069 sleep
= geturls_download_sleep
;
5070 urlparts
= urlparse
.urlparse(httpurl
);
5071 if(isinstance(httpheaders
, list)):
5072 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
5073 httpheaders
= fix_header_names(httpheaders
);
5074 if(isinstance(httpheaders
, dict)):
5075 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
5077 geturls_text
= download_file_from_pysftp_file(httpurl
);
5078 if(not geturls_text
):
5080 log
.info("Downloading URL "+httpurl
);
5081 returnval_content
= geturls_text
.read()[:];
5082 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5083 geturls_text
.close();
5087 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
5091 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
5092 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
5093 exec_time_start
= time
.time();
5094 myhash
= hashlib
.new("sha1");
5095 if(sys
.version
[0]=="2"):
5096 myhash
.update(httpurl
);
5097 myhash
.update(str(buffersize
));
5098 myhash
.update(str(exec_time_start
));
5099 if(sys
.version
[0]>="3"):
5100 myhash
.update(httpurl
.encode('utf-8'));
5101 myhash
.update(str(buffersize
).encode('utf-8'));
5102 myhash
.update(str(exec_time_start
).encode('utf-8'));
5103 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
5105 sleep
= geturls_download_sleep
;
5106 urlparts
= urlparse
.urlparse(httpurl
);
5107 if(isinstance(httpheaders
, list)):
5108 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
5109 httpheaders
= fix_header_names(httpheaders
);
5110 if(isinstance(httpheaders
, dict)):
5111 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
5113 geturls_text
= download_file_from_pysftp_file(httpurl
);
5114 if(not geturls_text
):
5116 geturls_text
.seek(0, 2);
5117 downloadsize
= geturls_text
.tell();
5118 geturls_text
.seek(0, 0);
5119 if(downloadsize
is not None):
5120 downloadsize
= int(downloadsize
);
5121 if downloadsize
is None: downloadsize
= 0;
5124 log
.info("Downloading URL "+httpurl
);
5125 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
5126 tmpfilename
= f
.name
;
5127 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
5129 databytes
= geturls_text
.read(buffersize
);
5130 if not databytes
: break;
5131 datasize
= len(databytes
);
5132 fulldatasize
= datasize
+ fulldatasize
;
5135 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5136 downloaddiff
= fulldatasize
- prevdownsize
;
5137 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5138 prevdownsize
= fulldatasize
;
5141 geturls_text
.close();
5142 exec_time_end
= time
.time();
5143 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
5144 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
5148 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
5152 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
5153 global geturls_download_sleep
;
5155 sleep
= geturls_download_sleep
;
5156 if(not outfile
=="-"):
5157 outpath
= outpath
.rstrip(os
.path
.sep
);
5158 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
5159 if(not os
.path
.exists(outpath
)):
5160 os
.makedirs(outpath
);
5161 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
5163 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
5165 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
5166 if(not pretmpfilename
):
5168 tmpfilename
= pretmpfilename
['Filename'];
5169 downloadsize
= os
.path
.getsize(tmpfilename
);
5171 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
5172 exec_time_start
= time
.time();
5173 shutil
.move(tmpfilename
, filepath
);
5174 exec_time_end
= time
.time();
5175 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
5176 if(os
.path
.exists(tmpfilename
)):
5177 os
.remove(tmpfilename
);
5178 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
5179 if(outfile
=="-" and sys
.version
[0]=="2"):
5180 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
5181 if(not pretmpfilename
):
5183 tmpfilename
= pretmpfilename
['Filename'];
5184 downloadsize
= os
.path
.getsize(tmpfilename
);
5187 exec_time_start
= time
.time();
5188 with
open(tmpfilename
, 'rb') as ft
:
5191 databytes
= ft
.read(buffersize
[1]);
5192 if not databytes
: break;
5193 datasize
= len(databytes
);
5194 fulldatasize
= datasize
+ fulldatasize
;
5197 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5198 downloaddiff
= fulldatasize
- prevdownsize
;
5199 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5200 prevdownsize
= fulldatasize
;
5203 fdata
= f
.getvalue();
5206 os
.remove(tmpfilename
);
5207 exec_time_end
= time
.time();
5208 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5209 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
5210 if(outfile
=="-" and sys
.version
[0]>="3"):
5211 pretmpfilename
= download_from_url_file_with_pysftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
5212 tmpfilename
= pretmpfilename
['Filename'];
5213 downloadsize
= os
.path
.getsize(tmpfilename
);
5216 exec_time_start
= time
.time();
5217 with
open(tmpfilename
, 'rb') as ft
:
5220 databytes
= ft
.read(buffersize
[1]);
5221 if not databytes
: break;
5222 datasize
= len(databytes
);
5223 fulldatasize
= datasize
+ fulldatasize
;
5226 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
5227 downloaddiff
= fulldatasize
- prevdownsize
;
5228 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
5229 prevdownsize
= fulldatasize
;
5232 fdata
= f
.getvalue();
5235 os
.remove(tmpfilename
);
5236 exec_time_end
= time
.time();
5237 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
5238 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
5242 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
5246 def upload_file_to_pysftp_file(sftpfile
, url
):
5247 urlparts
= urlparse
.urlparse(url
);
5248 file_name
= os
.path
.basename(urlparts
.path
);
5249 file_dir
= os
.path
.dirname(urlparts
.path
);
5250 sftp_port
= urlparts
.port
;
5251 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
5253 if(urlparts
.port
is None):
5256 sftp_port
= urlparts
.port
;
5257 if(urlparts
.username
is not None):
5258 sftp_username
= urlparts
.username
;
5260 sftp_username
= "anonymous";
5261 if(urlparts
.password
is not None):
5262 sftp_password
= urlparts
.password
;
5263 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
5264 sftp_password
= "anonymous";
5267 if(urlparts
.scheme
!="sftp"):
5270 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
5271 except paramiko
.ssh_exception
.SSHException
:
5273 except socket
.gaierror
:
5274 log
.info("Error With URL "+httpurl
);
5276 except socket
.timeout
:
5277 log
.info("Error With URL "+httpurl
);
5279 sftp
= ssh
.open_sftp();
5280 sftp
.putfo(sftpfile
, urlparts
.path
);
5283 sftpfile
.seek(0, 0);
5286 def upload_file_to_pysftp_file(sftpfile
, url
):
5290 def upload_file_to_pysftp_string(sftpstring
, url
):
5291 sftpfileo
= BytesIO(sftpstring
);
5292 sftpfile
= upload_file_to_pysftp_files(ftpfileo
, url
);
5296 def upload_file_to_pysftp_string(url
):