4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwget.py - Last Update: 9/17/2023 Ver. 1.2.4 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, gzip
, time
, argparse
, cgi
, subprocess
, socket
, email
.utils
, datetime
, time
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
29 havemechanize
= False;
34 havemechanize
= False;
49 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
77 if(sys
.version
[0]=="2"):
79 from cStringIO
import StringIO
;
81 from StringIO
import StringIO
;
82 # From http://python-future.org/compatible_idioms.html
83 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
84 from urllib
import urlencode
;
85 from urllib
import urlopen
as urlopenalt
;
86 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
, URLError
, build_opener
, HTTPCookieProcessor
;
87 import urlparse
, cookielib
;
88 from httplib
import HTTPConnection
, HTTPSConnection
;
89 if(sys
.version
[0]>="3"):
90 from io
import StringIO
, BytesIO
;
91 # From http://python-future.org/compatible_idioms.html
92 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
93 from urllib
.request
import urlopen
, Request
, install_opener
, build_opener
, HTTPCookieProcessor
;
94 from urllib
.error
import HTTPError
, URLError
;
95 import urllib
.parse
as urlparse
;
96 import http
.cookiejar
as cookielib
;
97 from http
.client
import HTTPConnection
, HTTPSConnection
;
99 __program_name__
= "PyWWW-Get";
100 __program_alt_name__
= "PyWWWGet";
101 __program_small_name__
= "wwwget";
102 __project__
= __program_name__
;
103 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
104 __version_info__
= (1, 2, 4, "RC 1", 1);
105 __version_date_info__
= (2023, 9, 17, "RC 1", 1);
106 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
107 __revision__
= __version_info__
[3];
108 __revision_id__
= "$Id: b5160e99346c45eb8cfbba0634fd5c48c9c6646f $";
109 if(__version_info__
[4] is not None):
110 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
111 if(__version_info__
[4] is None):
112 __version_date_plusrc__
= __version_date__
;
113 if(__version_info__
[3] is not None):
114 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
115 if(__version_info__
[3] is None):
116 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
118 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
120 pytempdir
= tempfile
.gettempdir();
122 PyBitness
= platform
.architecture();
123 if(PyBitness
=="32bit" or PyBitness
=="32"):
125 elif(PyBitness
=="64bit" or PyBitness
=="64"):
130 compression_supported
= "gzip, deflate";
132 compression_supported
= "gzip, deflate, br";
134 compression_supported
= "gzip, deflate";
136 geturls_cj
= cookielib
.CookieJar();
137 windowsNT4_ua_string
= "Windows NT 4.0";
138 windowsNT4_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
139 windows2k_ua_string
= "Windows NT 5.0";
140 windows2k_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
141 windowsXP_ua_string
= "Windows NT 5.1";
142 windowsXP_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
143 windowsXP64_ua_string
= "Windows NT 5.2; Win64; x64";
144 windowsXP64_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
145 windows7_ua_string
= "Windows NT 6.1; Win64; x64";
146 windows7_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
147 windows8_ua_string
= "Windows NT 6.2; Win64; x64";
148 windows8_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
149 windows81_ua_string
= "Windows NT 6.3; Win64; x64";
150 windows81_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
151 windows10_ua_string
= "Windows NT 10.0; Win64; x64";
152 windows10_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
153 windows11_ua_string
= "Windows NT 11.0; Win64; x64";
154 windows11_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
155 geturls_ua_firefox_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:109.0) Gecko/20100101 Firefox/117.0";
156 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
157 geturls_ua_chrome_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
158 geturls_ua_chromium_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
159 geturls_ua_palemoon_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
160 geturls_ua_opera_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
161 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
162 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; Trident/7.0; rv:11.0) like Gecko";
163 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
164 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
165 if(platform
.python_implementation()!=""):
166 py_implementation
= platform
.python_implementation();
167 if(platform
.python_implementation()==""):
168 py_implementation
= "Python";
169 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=py_implementation
, pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
170 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
171 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
172 geturls_ua
= geturls_ua_firefox_windows7
;
173 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
174 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
175 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
176 geturls_headers_chrome_windows7
.update(windows7_ua_addon
);
177 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
178 geturls_headers_chromium_windows7
.update(windows7_ua_addon
);
179 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
180 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
181 geturls_headers_opera_windows7
.update(windows7_ua_addon
);
182 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
183 geturls_headers_vivaldi_windows7
.update(windows7_ua_addon
);
184 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
185 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
186 geturls_headers_microsoft_edge_windows7
.update(windows7_ua_addon
);
187 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
188 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
189 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
190 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
191 geturls_headers
= geturls_headers_firefox_windows7
;
192 geturls_download_sleep
= 0;
194 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
195 if(outtype
=="print" and dbgenable
):
198 elif(outtype
=="log" and dbgenable
):
199 logging
.info(dbgtxt
);
201 elif(outtype
=="warning" and dbgenable
):
202 logging
.warning(dbgtxt
);
204 elif(outtype
=="error" and dbgenable
):
205 logging
.error(dbgtxt
);
207 elif(outtype
=="critical" and dbgenable
):
208 logging
.critical(dbgtxt
);
210 elif(outtype
=="exception" and dbgenable
):
211 logging
.exception(dbgtxt
);
213 elif(outtype
=="logalt" and dbgenable
):
214 logging
.log(dgblevel
, dbgtxt
);
216 elif(outtype
=="debug" and dbgenable
):
217 logging
.debug(dbgtxt
);
225 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
226 dbgout
= verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
231 def add_url_param(url
, **params
):
233 parts
= list(urlparse
.urlsplit(url
));
234 d
= dict(cgi
.parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
236 parts
[n
]=urlencode(d
);
237 return urlparse
.urlunsplit(parts
);
239 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
240 def which_exec(execfile):
241 for path
in os
.environ
["PATH"].split(":"):
242 if os
.path
.exists(path
+ "/" + execfile):
243 return path
+ "/" + execfile;
245 def listize(varlist
):
253 newlistreg
.update({ilx
: varlist
[il
]});
254 newlistrev
.update({varlist
[il
]: ilx
});
257 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
260 def twolistize(varlist
):
270 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
271 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
272 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
273 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
276 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
277 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
278 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
281 def arglistize(proexec
, *varlist
):
285 newarglist
= [proexec
];
287 if varlist
[il
][0] is not None:
288 newarglist
.append(varlist
[il
][0]);
289 if varlist
[il
][1] is not None:
290 newarglist
.append(varlist
[il
][1]);
294 # hms_string by ArcGIS Python Recipes
295 # https://arcpy.wordpress.com/2012/04/20/146/
296 def hms_string(sec_elapsed
):
297 h
= int(sec_elapsed
/ (60 * 60));
298 m
= int((sec_elapsed
% (60 * 60)) / 60);
299 s
= sec_elapsed
% 60.0;
300 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
302 # get_readable_size by Lipis
303 # http://stackoverflow.com/posts/14998888/revisions
304 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
306 if(unit
!="IEC" and unit
!="SI"):
309 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
310 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
313 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
314 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
319 if abs(bytes
) < unitsize
:
320 strformat
= "%3."+str(precision
)+"f%s";
321 pre_return_val
= (strformat
% (bytes
, unit
));
322 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
323 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
324 alt_return_val
= pre_return_val
.split();
325 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
328 strformat
= "%."+str(precision
)+"f%s";
329 pre_return_val
= (strformat
% (bytes
, "YiB"));
330 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
331 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
332 alt_return_val
= pre_return_val
.split();
333 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
336 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
338 usehashtypes
= usehashtypes
.lower();
339 getfilesize
= os
.path
.getsize(infile
);
340 return_val
= get_readable_size(getfilesize
, precision
, unit
);
342 hashtypelist
= usehashtypes
.split(",");
343 openfile
= open(infile
, "rb");
344 filecontents
= openfile
.read();
347 listnumend
= len(hashtypelist
);
348 while(listnumcount
< listnumend
):
349 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
350 hashtypelistup
= hashtypelistlow
.upper();
351 filehash
= hashlib
.new(hashtypelistup
);
352 filehash
.update(filecontents
);
353 filegethash
= filehash
.hexdigest();
354 return_val
.update({hashtypelistup
: filegethash
});
358 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
360 usehashtypes
= usehashtypes
.lower();
361 getfilesize
= len(instring
);
362 return_val
= get_readable_size(getfilesize
, precision
, unit
);
364 hashtypelist
= usehashtypes
.split(",");
366 listnumend
= len(hashtypelist
);
367 while(listnumcount
< listnumend
):
368 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
369 hashtypelistup
= hashtypelistlow
.upper();
370 filehash
= hashlib
.new(hashtypelistup
);
371 if(sys
.version
[0]=="2"):
372 filehash
.update(instring
);
373 if(sys
.version
[0]>="3"):
374 filehash
.update(instring
.encode('utf-8'));
375 filegethash
= filehash
.hexdigest();
376 return_val
.update({hashtypelistup
: filegethash
});
380 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
381 if isinstance(headers
, dict):
383 if(sys
.version
[0]=="2"):
384 for headkey
, headvalue
in headers
.iteritems():
385 returnval
.append((headkey
, headvalue
));
386 if(sys
.version
[0]>="3"):
387 for headkey
, headvalue
in headers
.items():
388 returnval
.append((headkey
, headvalue
));
389 elif isinstance(headers
, list):
395 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
396 if isinstance(headers
, dict):
398 if(sys
.version
[0]=="2"):
399 for headkey
, headvalue
in headers
.iteritems():
400 returnval
.append(headkey
+": "+headvalue
);
401 if(sys
.version
[0]>="3"):
402 for headkey
, headvalue
in headers
.items():
403 returnval
.append(headkey
+": "+headvalue
);
404 elif isinstance(headers
, list):
410 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", compression_supported
), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
411 if isinstance(headers
, list):
416 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
418 elif isinstance(headers
, dict):
424 def get_httplib_support(checkvalue
=None):
425 global haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
;
427 returnval
.append("ftp");
428 returnval
.append("httplib");
430 returnval
.append("httplib2");
431 returnval
.append("urllib");
433 returnval
.append("urllib3");
434 returnval
.append("request3");
435 returnval
.append("request");
437 returnval
.append("requests");
439 returnval
.append("httpx");
440 returnval
.append("httpx2");
442 returnval
.append("mechanize");
444 returnval
.append("sftp");
445 if(not checkvalue
is None):
446 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
447 checkvalue
= "urllib";
448 if(checkvalue
=="httplib1"):
449 checkvalue
= "httplib";
450 if(checkvalue
in returnval
):
456 def check_httplib_support(checkvalue
="urllib"):
457 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
458 checkvalue
= "urllib";
459 if(checkvalue
=="httplib1"):
460 checkvalue
= "httplib";
461 returnval
= get_httplib_support(checkvalue
);
464 def get_httplib_support_list():
465 returnval
= get_httplib_support(None);
468 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", sleep
=-1):
469 global geturls_download_sleep
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
;
471 sleep
= geturls_download_sleep
;
472 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
473 httplibuse
= "urllib";
474 if(httplibuse
=="httplib1"):
475 httplibuse
= "httplib";
476 if(not haverequests
and httplibuse
=="requests"):
477 httplibuse
= "urllib";
478 if(not havehttpx
and httplibuse
=="httpx"):
479 httplibuse
= "urllib";
480 if(not havehttpx
and httplibuse
=="httpx2"):
481 httplibuse
= "urllib";
482 if(not havehttpcore
and httplibuse
=="httpcore"):
483 httplibuse
= "urllib";
484 if(not havehttpcore
and httplibuse
=="httpcore2"):
485 httplibuse
= "urllib";
486 if(not havemechanize
and httplibuse
=="mechanize"):
487 httplibuse
= "urllib";
488 if(not havehttplib2
and httplibuse
=="httplib2"):
489 httplibuse
= "httplib";
490 if(not haveparamiko
and httplibuse
=="sftp"):
492 if(httplibuse
=="urllib"):
493 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
494 elif(httplibuse
=="request"):
495 returnval
= download_from_url_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
496 elif(httplibuse
=="request3"):
497 returnval
= download_from_url_with_request3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
498 elif(httplibuse
=="httplib"):
499 returnval
= download_from_url_with_httplib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
500 elif(httplibuse
=="httplib2"):
501 returnval
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
502 elif(httplibuse
=="urllib3"):
503 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
504 elif(httplibuse
=="requests"):
505 returnval
= download_from_url_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
506 elif(httplibuse
=="httpx"):
507 returnval
= download_from_url_with_httpx(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
508 elif(httplibuse
=="httpx2"):
509 returnval
= download_from_url_with_httpx2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
510 elif(httplibuse
=="httpcore"):
511 returnval
= download_from_url_with_httpcore(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
512 elif(httplibuse
=="httpcore2"):
513 returnval
= download_from_url_with_httpcore2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
514 elif(httplibuse
=="mechanize"):
515 returnval
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
516 elif(httplibuse
=="ftp"):
517 returnval
= download_from_url_with_ftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
518 elif(httplibuse
=="sftp"):
519 returnval
= download_from_url_with_sftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
524 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1):
525 global geturls_download_sleep
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
;
527 sleep
= geturls_download_sleep
;
528 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
529 httplibuse
= "urllib";
530 if(httplibuse
=="httplib1"):
531 httplibuse
= "httplib";
532 if(not haverequests
and httplibuse
=="requests"):
533 httplibuse
= "urllib";
534 if(not havehttpx
and httplibuse
=="httpx"):
535 httplibuse
= "urllib";
536 if(not havehttpx
and httplibuse
=="httpx2"):
537 httplibuse
= "urllib";
538 if(not havehttpcore
and httplibuse
=="httpcore"):
539 httplibuse
= "urllib";
540 if(not havehttpcore
and httplibuse
=="httpcore2"):
541 httplibuse
= "urllib";
542 if(not havemechanize
and httplibuse
=="mechanize"):
543 httplibuse
= "urllib";
544 if(not havehttplib2
and httplibuse
=="httplib2"):
545 httplibuse
= "httplib";
546 if(not haveparamiko
and httplibuse
=="sftp"):
548 if(httplibuse
=="urllib"):
549 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
550 elif(httplibuse
=="request"):
551 returnval
= download_from_url_file_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
552 elif(httplibuse
=="request3"):
553 returnval
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
554 elif(httplibuse
=="httplib"):
555 returnval
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
556 elif(httplibuse
=="httplib2"):
557 returnval
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
558 elif(httplibuse
=="urllib3"):
559 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
560 elif(httplibuse
=="requests"):
561 returnval
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
562 elif(httplibuse
=="httpx"):
563 returnval
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
564 elif(httplibuse
=="httpx2"):
565 returnval
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
566 elif(httplibuse
=="httpcore"):
567 returnval
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
568 elif(httplibuse
=="httpcore2"):
569 returnval
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
570 elif(httplibuse
=="mechanize"):
571 returnval
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
572 elif(httplibuse
=="ftp"):
573 returnval
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
574 elif(httplibuse
=="sftp"):
575 returnval
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
580 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
581 global geturls_download_sleep
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcorei
, haveparamiko
;
583 sleep
= geturls_download_sleep
;
584 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
585 httplibuse
= "urllib";
586 if(httplibuse
=="httplib1"):
587 httplibuse
= "httplib";
588 if(not haverequests
and httplibuse
=="requests"):
589 httplibuse
= "urllib";
590 if(not havehttpx
and httplibuse
=="httpx"):
591 httplibuse
= "urllib";
592 if(not havehttpx
and httplibuse
=="httpx2"):
593 httplibuse
= "urllib";
594 if(not havehttpcore
and httplibuse
=="httpcore"):
595 httplibuse
= "urllib";
596 if(not havehttpcore
and httplibuse
=="httpcore2"):
597 httplibuse
= "urllib";
598 if(not havemechanize
and httplibuse
=="mechanize"):
599 httplibuse
= "urllib";
600 if(not havehttplib2
and httplibuse
=="httplib2"):
601 httplibuse
= "httplib";
602 if(not haveparamiko
and httplibuse
=="sftp"):
604 if(httplibuse
=="urllib"):
605 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
606 elif(httplibuse
=="request"):
607 returnval
= download_from_url_to_file_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
608 elif(httplibuse
=="request3"):
609 returnval
= download_from_url_to_file_with_request3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
610 elif(httplibuse
=="httplib"):
611 returnval
= download_from_url_to_file_with_httplib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
612 elif(httplibuse
=="httplib2"):
613 returnval
= download_from_url_to_file_with_httplib2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
614 elif(httplibuse
=="urllib3"):
615 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
616 elif(httplibuse
=="requests"):
617 returnval
= download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
618 elif(httplibuse
=="httpx"):
619 returnval
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
620 elif(httplibuse
=="httpx2"):
621 returnval
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
622 elif(httplibuse
=="httpcore"):
623 returnval
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
624 elif(httplibuse
=="httpcore2"):
625 returnval
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
626 elif(httplibuse
=="mechanize"):
627 returnval
= download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
628 elif(httplibuse
=="ftp"):
629 returnval
= download_from_url_to_file_with_ftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
630 elif(httplibuse
=="sftp"):
631 returnval
= download_from_url_to_file_with_sftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
636 def download_from_url_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
637 global geturls_download_sleep
, havebrotli
;
639 sleep
= geturls_download_sleep
;
640 urlparts
= urlparse
.urlparse(httpurl
);
641 if(isinstance(httpheaders
, list)):
642 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
643 if(urlparts
.username
is not None or urlparts
.password
is not None):
644 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
645 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
646 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
647 if(isinstance(httpheaders
, dict)):
648 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
649 geturls_opener
.addheaders
= httpheaders
;
651 if(postdata
is not None and not isinstance(postdata
, dict)):
652 postdata
= urlencode(postdata
);
654 if(httpmethod
=="GET"):
655 geturls_text
= geturls_opener
.open(httpurl
);
656 elif(httpmethod
=="POST"):
657 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
659 geturls_text
= geturls_opener
.open(httpurl
);
660 except HTTPError
as geturls_text_error
:
661 geturls_text
= geturls_text_error
;
662 log
.info("Error With URL "+httpurl
);
664 log
.info("Error With URL "+httpurl
);
666 except socket
.timeout
:
667 log
.info("Error With URL "+httpurl
);
669 httpcodeout
= geturls_text
.getcode();
670 httpversionout
= "1.1";
671 httpmethodout
= httpmethod
;
672 httpurlout
= geturls_text
.geturl();
673 httpheaderout
= geturls_text
.info();
674 httpheadersentout
= httpheaders
;
676 prehttpheaderout
= geturls_text
.info();
677 httpheaderkeys
= geturls_text
.info().keys();
678 imax
= len(httpheaderkeys
);
682 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
684 except AttributeError:
685 httpheaderout
= geturls_text
.info();
686 if(isinstance(httpheaderout
, list)):
687 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
688 if(isinstance(httpheadersentout
, list)):
689 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
690 log
.info("Downloading URL "+httpurl
);
691 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
692 if(sys
.version
[0]=="2"):
693 strbuf
= StringIO(geturls_text
.read());
694 if(sys
.version
[0]>="3"):
695 strbuf
= BytesIO(geturls_text
.read());
696 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
697 returnval_content
= gzstrbuf
.read()[:];
698 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
699 returnval_content
= geturls_text
.read()[:];
700 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
701 returnval_content
= brotli
.decompress(returnval_content
);
702 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
703 geturls_text
.close();
706 def download_from_url_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
707 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
708 exec_time_start
= time
.time();
709 myhash
= hashlib
.new("sha1");
710 if(sys
.version
[0]=="2"):
711 myhash
.update(httpurl
);
712 myhash
.update(str(buffersize
));
713 myhash
.update(str(exec_time_start
));
714 if(sys
.version
[0]>="3"):
715 myhash
.update(httpurl
.encode('utf-8'));
716 myhash
.update(str(buffersize
).encode('utf-8'));
717 myhash
.update(str(exec_time_start
).encode('utf-8'));
718 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
720 sleep
= geturls_download_sleep
;
721 urlparts
= urlparse
.urlparse(httpurl
);
722 if(isinstance(httpheaders
, list)):
723 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
724 if(urlparts
.username
is not None or urlparts
.password
is not None):
725 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
726 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
727 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
728 if(isinstance(httpheaders
, dict)):
729 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
730 geturls_opener
.addheaders
= httpheaders
;
733 if(httpmethod
=="GET"):
734 geturls_text
= geturls_opener
.open(httpurl
);
735 elif(httpmethod
=="POST"):
736 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
738 geturls_text
= geturls_opener
.open(httpurl
);
739 except HTTPError
as geturls_text_error
:
740 geturls_text
= geturls_text_error
;
741 log
.info("Error With URL "+httpurl
);
743 log
.info("Error With URL "+httpurl
);
745 except socket
.timeout
:
746 log
.info("Error With URL "+httpurl
);
748 except socket
.timeout
:
749 log
.info("Error With URL "+httpurl
);
751 httpcodeout
= geturls_text
.getcode();
752 httpversionout
= "1.1";
753 httpmethodout
= httpmethod
;
754 httpurlout
= geturls_text
.geturl();
755 httpheaderout
= geturls_text
.info();
756 httpheadersentout
= httpheaders
;
758 prehttpheaderout
= geturls_text
.info();
759 httpheaderkeys
= geturls_text
.info().keys();
760 imax
= len(httpheaderkeys
);
764 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
766 except AttributeError:
767 httpheaderout
= geturls_text
.info();
768 if(isinstance(httpheaderout
, list)):
769 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
770 if(isinstance(httpheadersentout
, list)):
771 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
772 downloadsize
= httpheaderout
.get('Content-Length');
773 if(downloadsize
is not None):
774 downloadsize
= int(downloadsize
);
775 if downloadsize
is None: downloadsize
= 0;
778 log
.info("Downloading URL "+httpurl
);
779 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
780 tmpfilename
= f
.name
;
782 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple())));
783 except AttributeError:
785 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
787 databytes
= geturls_text
.read(buffersize
);
788 if not databytes
: break;
789 datasize
= len(databytes
);
790 fulldatasize
= datasize
+ fulldatasize
;
793 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
794 downloaddiff
= fulldatasize
- prevdownsize
;
795 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
796 prevdownsize
= fulldatasize
;
799 geturls_text
.close();
800 exec_time_end
= time
.time();
801 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
802 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
805 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
806 global geturls_download_sleep
;
808 sleep
= geturls_download_sleep
;
809 if(not outfile
=="-"):
810 outpath
= outpath
.rstrip(os
.path
.sep
);
811 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
812 if(not os
.path
.exists(outpath
)):
813 os
.makedirs(outpath
);
814 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
816 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
818 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
819 if(not pretmpfilename
):
821 tmpfilename
= pretmpfilename
['Filename'];
822 downloadsize
= os
.path
.getsize(tmpfilename
);
824 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
825 exec_time_start
= time
.time();
826 shutil
.move(tmpfilename
, filepath
);
828 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple())));
829 except AttributeError:
831 exec_time_end
= time
.time();
832 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
833 if(os
.path
.exists(tmpfilename
)):
834 os
.remove(tmpfilename
);
835 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
836 if(outfile
=="-" and sys
.version
[0]=="2"):
837 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
838 if(not pretmpfilename
):
840 tmpfilename
= pretmpfilename
['Filename'];
841 downloadsize
= os
.path
.getsize(tmpfilename
);
844 exec_time_start
= time
.time();
845 with
open(tmpfilename
, 'rb') as ft
:
848 databytes
= ft
.read(buffersize
[1]);
849 if not databytes
: break;
850 datasize
= len(databytes
);
851 fulldatasize
= datasize
+ fulldatasize
;
854 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
855 downloaddiff
= fulldatasize
- prevdownsize
;
856 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
857 prevdownsize
= fulldatasize
;
860 fdata
= f
.getvalue();
863 os
.remove(tmpfilename
);
864 exec_time_end
= time
.time();
865 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
866 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
867 if(outfile
=="-" and sys
.version
[0]>="3"):
868 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
869 tmpfilename
= pretmpfilename
['Filename'];
870 downloadsize
= os
.path
.getsize(tmpfilename
);
873 exec_time_start
= time
.time();
874 with
open(tmpfilename
, 'rb') as ft
:
877 databytes
= ft
.read(buffersize
[1]);
878 if not databytes
: break;
879 datasize
= len(databytes
);
880 fulldatasize
= datasize
+ fulldatasize
;
883 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
884 downloaddiff
= fulldatasize
- prevdownsize
;
885 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
886 prevdownsize
= fulldatasize
;
889 fdata
= f
.getvalue();
892 os
.remove(tmpfilename
);
893 exec_time_end
= time
.time();
894 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
895 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
898 def download_from_url_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
899 global geturls_download_sleep
, havebrotli
;
901 sleep
= geturls_download_sleep
;
902 urlparts
= urlparse
.urlparse(httpurl
);
903 if(isinstance(httpheaders
, list)):
904 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
905 if(urlparts
.username
is not None or urlparts
.password
is not None):
906 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
907 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
908 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
909 geturls_opener
.addheaders
= httpheaders
;
911 if(urlparts
[0]=="http"):
912 httpconn
= HTTPConnection(urlparts
[1]);
913 elif(urlparts
[0]=="https"):
914 httpconn
= HTTPSConnection(urlparts
[1]);
917 if(postdata
is not None and not isinstance(postdata
, dict)):
918 postdata
= urlencode(postdata
);
920 if(httpmethod
=="GET"):
921 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
922 elif(httpmethod
=="POST"):
923 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
925 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
926 except socket
.timeout
:
927 log
.info("Error With URL "+httpurl
);
929 except socket
.gaierror
:
930 log
.info("Error With URL "+httpurl
);
932 geturls_text
= httpconn
.getresponse();
933 httpcodeout
= geturls_text
.status
;
934 httpversionout
= "1.1";
935 httpmethodout
= httpmethod
;
936 httpurlout
= httpurl
;
937 httpheaderout
= geturls_text
.getheaders();
938 httpheadersentout
= httpheaders
;
939 if(isinstance(httpheaderout
, list)):
940 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
941 if(isinstance(httpheadersentout
, list)):
942 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
943 log
.info("Downloading URL "+httpurl
);
944 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
945 if(sys
.version
[0]=="2"):
946 strbuf
= StringIO(geturls_text
.read());
947 if(sys
.version
[0]>="3"):
948 strbuf
= BytesIO(geturls_text
.read());
949 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
950 returnval_content
= gzstrbuf
.read()[:];
951 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
952 returnval_content
= geturls_text
.read()[:];
953 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
954 returnval_content
= brotli
.decompress(returnval_content
);
955 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
956 geturls_text
.close();
959 def download_from_url_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
960 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
961 exec_time_start
= time
.time();
962 myhash
= hashlib
.new("sha1");
963 if(sys
.version
[0]=="2"):
964 myhash
.update(httpurl
);
965 myhash
.update(str(buffersize
));
966 myhash
.update(str(exec_time_start
));
967 if(sys
.version
[0]>="3"):
968 myhash
.update(httpurl
.encode('utf-8'));
969 myhash
.update(str(buffersize
).encode('utf-8'));
970 myhash
.update(str(exec_time_start
).encode('utf-8'));
971 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
973 sleep
= geturls_download_sleep
;
974 urlparts
= urlparse
.urlparse(httpurl
);
975 if(isinstance(httpheaders
, list)):
976 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
977 if(urlparts
.username
is not None or urlparts
.password
is not None):
978 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
979 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
980 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
981 geturls_opener
.addheaders
= httpheaders
;
983 if(urlparts
[0]=="http"):
984 httpconn
= HTTPConnection(urlparts
[1]);
985 elif(urlparts
[0]=="https"):
986 httpconn
= HTTPSConnection(urlparts
[1]);
989 if(postdata
is not None and not isinstance(postdata
, dict)):
990 postdata
= urlencode(postdata
);
992 if(httpmethod
=="GET"):
993 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
994 elif(httpmethod
=="POST"):
995 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
997 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
998 except socket
.timeout
:
999 log
.info("Error With URL "+httpurl
);
1001 except socket
.gaierror
:
1002 log
.info("Error With URL "+httpurl
);
1004 geturls_text
= httpconn
.getresponse();
1005 httpcodeout
= geturls_text
.status
;
1006 httpversionout
= "1.1";
1007 httpmethodout
= httpmethod
;
1008 httpurlout
= httpurl
;
1009 httpheaderout
= geturls_text
.getheaders();
1010 httpheadersentout
= httpheaders
;
1011 if(isinstance(httpheaderout
, list)):
1012 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1013 if(isinstance(httpheadersentout
, list)):
1014 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1015 downloadsize
= httpheaderout
.get('Content-Length');
1016 if(downloadsize
is not None):
1017 downloadsize
= int(downloadsize
);
1018 if downloadsize
is None: downloadsize
= 0;
1021 log
.info("Downloading URL "+httpurl
);
1022 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1023 tmpfilename
= f
.name
;
1025 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple())));
1026 except AttributeError:
1028 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
1030 databytes
= geturls_text
.read(buffersize
);
1031 if not databytes
: break;
1032 datasize
= len(databytes
);
1033 fulldatasize
= datasize
+ fulldatasize
;
1036 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1037 downloaddiff
= fulldatasize
- prevdownsize
;
1038 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1039 prevdownsize
= fulldatasize
;
1042 geturls_text
.close();
1043 exec_time_end
= time
.time();
1044 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1045 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1048 def download_from_url_to_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1049 global geturls_download_sleep
;
1051 sleep
= geturls_download_sleep
;
1052 if(not outfile
=="-"):
1053 outpath
= outpath
.rstrip(os
.path
.sep
);
1054 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1055 if(not os
.path
.exists(outpath
)):
1056 os
.makedirs(outpath
);
1057 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1059 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1061 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1062 if(not pretmpfilename
):
1064 tmpfilename
= pretmpfilename
['Filename'];
1065 downloadsize
= os
.path
.getsize(tmpfilename
);
1067 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1068 exec_time_start
= time
.time();
1069 shutil
.move(tmpfilename
, filepath
);
1071 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple())));
1072 except AttributeError:
1074 exec_time_end
= time
.time();
1075 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1076 if(os
.path
.exists(tmpfilename
)):
1077 os
.remove(tmpfilename
);
1078 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1079 if(outfile
=="-" and sys
.version
[0]=="2"):
1080 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1081 if(not pretmpfilename
):
1083 tmpfilename
= pretmpfilename
['Filename'];
1084 downloadsize
= os
.path
.getsize(tmpfilename
);
1087 exec_time_start
= time
.time();
1088 with
open(tmpfilename
, 'rb') as ft
:
1091 databytes
= ft
.read(buffersize
[1]);
1092 if not databytes
: break;
1093 datasize
= len(databytes
);
1094 fulldatasize
= datasize
+ fulldatasize
;
1097 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1098 downloaddiff
= fulldatasize
- prevdownsize
;
1099 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1100 prevdownsize
= fulldatasize
;
1103 fdata
= f
.getvalue();
1106 os
.remove(tmpfilename
);
1107 exec_time_end
= time
.time();
1108 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1109 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1110 if(outfile
=="-" and sys
.version
[0]>="3"):
1111 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1112 tmpfilename
= pretmpfilename
['Filename'];
1113 downloadsize
= os
.path
.getsize(tmpfilename
);
1116 exec_time_start
= time
.time();
1117 with
open(tmpfilename
, 'rb') as ft
:
1120 databytes
= ft
.read(buffersize
[1]);
1121 if not databytes
: break;
1122 datasize
= len(databytes
);
1123 fulldatasize
= datasize
+ fulldatasize
;
1126 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1127 downloaddiff
= fulldatasize
- prevdownsize
;
1128 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1129 prevdownsize
= fulldatasize
;
1132 fdata
= f
.getvalue();
1135 os
.remove(tmpfilename
);
1136 exec_time_end
= time
.time();
1137 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1138 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1142 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1143 global geturls_download_sleep
, havebrotli
;
1145 sleep
= geturls_download_sleep
;
1146 urlparts
= urlparse
.urlparse(httpurl
);
1147 if(isinstance(httpheaders
, list)):
1148 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1149 if(urlparts
.username
is not None or urlparts
.password
is not None):
1150 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1151 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1152 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1153 geturls_opener
.addheaders
= httpheaders
;
1155 if(urlparts
[0]=="http"):
1156 httpconn
= HTTPConnectionWithTimeout(urlparts
[1]);
1157 elif(urlparts
[0]=="https"):
1158 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1]);
1161 if(postdata
is not None and not isinstance(postdata
, dict)):
1162 postdata
= urlencode(postdata
);
1164 if(httpmethod
=="GET"):
1165 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1166 elif(httpmethod
=="POST"):
1167 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1169 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1170 except socket
.timeout
:
1171 log
.info("Error With URL "+httpurl
);
1173 except socket
.gaierror
:
1174 log
.info("Error With URL "+httpurl
);
1176 geturls_text
= httpconn
.getresponse();
1177 httpcodeout
= geturls_text
.status
;
1178 httpversionout
= "1.1";
1179 httpmethodout
= httpmethod
;
1180 httpurlout
= httpurl
;
1181 httpheaderout
= geturls_text
.getheaders();
1182 httpheadersentout
= httpheaders
;
1183 if(isinstance(httpheaderout
, list)):
1184 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1185 if(isinstance(httpheadersentout
, list)):
1186 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1187 log
.info("Downloading URL "+httpurl
);
1188 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
1189 if(sys
.version
[0]=="2"):
1190 strbuf
= StringIO(geturls_text
.read());
1191 if(sys
.version
[0]>="3"):
1192 strbuf
= BytesIO(geturls_text
.read());
1193 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1194 returnval_content
= gzstrbuf
.read()[:];
1195 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
1196 returnval_content
= geturls_text
.read()[:];
1197 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1198 returnval_content
= brotli
.decompress(returnval_content
);
1199 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
1200 geturls_text
.close();
1203 if(not havehttplib2
):
1204 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1205 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
1209 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1210 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1211 exec_time_start
= time
.time();
1212 myhash
= hashlib
.new("sha1");
1213 if(sys
.version
[0]=="2"):
1214 myhash
.update(httpurl
);
1215 myhash
.update(str(buffersize
));
1216 myhash
.update(str(exec_time_start
));
1217 if(sys
.version
[0]>="3"):
1218 myhash
.update(httpurl
.encode('utf-8'));
1219 myhash
.update(str(buffersize
).encode('utf-8'));
1220 myhash
.update(str(exec_time_start
).encode('utf-8'));
1221 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1223 sleep
= geturls_download_sleep
;
1224 urlparts
= urlparse
.urlparse(httpurl
);
1225 if(isinstance(httpheaders
, list)):
1226 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1227 if(urlparts
.username
is not None or urlparts
.password
is not None):
1228 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1229 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1230 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1231 geturls_opener
.addheaders
= httpheaders
;
1233 if(urlparts
[0]=="http"):
1234 httpconn
= HTTPConnectionWithTimeout(urlparts
[1]);
1235 elif(urlparts
[0]=="https"):
1236 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1]);
1239 if(postdata
is not None and not isinstance(postdata
, dict)):
1240 postdata
= urlencode(postdata
);
1242 if(httpmethod
=="GET"):
1243 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1244 elif(httpmethod
=="POST"):
1245 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1247 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1248 except socket
.timeout
:
1249 log
.info("Error With URL "+httpurl
);
1251 except socket
.gaierror
:
1252 log
.info("Error With URL "+httpurl
);
1254 geturls_text
= httpconn
.getresponse();
1255 httpcodeout
= geturls_text
.status
;
1256 httpversionout
= "1.1";
1257 httpmethodout
= httpmethod
;
1258 httpurlout
= httpurl
;
1259 httpheaderout
= geturls_text
.getheaders();
1260 httpheadersentout
= httpheaders
;
1261 if(isinstance(httpheaderout
, list)):
1262 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1263 if(isinstance(httpheadersentout
, list)):
1264 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1265 downloadsize
= httpheaderout
.get('Content-Length');
1266 if(downloadsize
is not None):
1267 downloadsize
= int(downloadsize
);
1268 if downloadsize
is None: downloadsize
= 0;
1271 log
.info("Downloading URL "+httpurl
);
1272 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1273 tmpfilename
= f
.name
;
1275 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple())));
1276 except AttributeError:
1278 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
1280 databytes
= geturls_text
.read(buffersize
);
1281 if not databytes
: break;
1282 datasize
= len(databytes
);
1283 fulldatasize
= datasize
+ fulldatasize
;
1286 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1287 downloaddiff
= fulldatasize
- prevdownsize
;
1288 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1289 prevdownsize
= fulldatasize
;
1292 geturls_text
.close();
1293 exec_time_end
= time
.time();
1294 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1295 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1298 if(not havehttplib2
):
1299 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1300 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
1304 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1305 global geturls_download_sleep
;
1307 sleep
= geturls_download_sleep
;
1308 if(not outfile
=="-"):
1309 outpath
= outpath
.rstrip(os
.path
.sep
);
1310 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1311 if(not os
.path
.exists(outpath
)):
1312 os
.makedirs(outpath
);
1313 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1315 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1317 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1318 if(not pretmpfilename
):
1320 tmpfilename
= pretmpfilename
['Filename'];
1321 downloadsize
= os
.path
.getsize(tmpfilename
);
1323 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1324 exec_time_start
= time
.time();
1325 shutil
.move(tmpfilename
, filepath
);
1327 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple())));
1328 except AttributeError:
1330 exec_time_end
= time
.time();
1331 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1332 if(os
.path
.exists(tmpfilename
)):
1333 os
.remove(tmpfilename
);
1334 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1335 if(outfile
=="-" and sys
.version
[0]=="2"):
1336 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1337 if(not pretmpfilename
):
1339 tmpfilename
= pretmpfilename
['Filename'];
1340 downloadsize
= os
.path
.getsize(tmpfilename
);
1343 exec_time_start
= time
.time();
1344 with
open(tmpfilename
, 'rb') as ft
:
1347 databytes
= ft
.read(buffersize
[1]);
1348 if not databytes
: break;
1349 datasize
= len(databytes
);
1350 fulldatasize
= datasize
+ fulldatasize
;
1353 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1354 downloaddiff
= fulldatasize
- prevdownsize
;
1355 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1356 prevdownsize
= fulldatasize
;
1359 fdata
= f
.getvalue();
1362 os
.remove(tmpfilename
);
1363 exec_time_end
= time
.time();
1364 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1365 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1366 if(outfile
=="-" and sys
.version
[0]>="3"):
1367 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1368 tmpfilename
= pretmpfilename
['Filename'];
1369 downloadsize
= os
.path
.getsize(tmpfilename
);
1372 exec_time_start
= time
.time();
1373 with
open(tmpfilename
, 'rb') as ft
:
1376 databytes
= ft
.read(buffersize
[1]);
1377 if not databytes
: break;
1378 datasize
= len(databytes
);
1379 fulldatasize
= datasize
+ fulldatasize
;
1382 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1383 downloaddiff
= fulldatasize
- prevdownsize
;
1384 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1385 prevdownsize
= fulldatasize
;
1388 fdata
= f
.getvalue();
1391 os
.remove(tmpfilename
);
1392 exec_time_end
= time
.time();
1393 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1394 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1397 if(not havehttplib2
):
1398 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1399 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
1402 def download_from_url_with_request(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1403 global geturls_download_sleep
, havebrotli
;
1405 sleep
= geturls_download_sleep
;
1406 urlparts
= urlparse
.urlparse(httpurl
);
1407 if(isinstance(httpheaders
, list)):
1408 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1409 if(urlparts
.username
is not None or urlparts
.password
is not None):
1410 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1411 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1412 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1413 if(isinstance(httpheaders
, dict)):
1414 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1415 geturls_opener
.addheaders
= httpheaders
;
1416 install_opener(geturls_opener
);
1418 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1419 if(postdata
is not None and not isinstance(postdata
, dict)):
1420 postdata
= urlencode(postdata
);
1422 if(httpmethod
=="GET"):
1423 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1424 geturls_text
= urlopen(geturls_request
);
1425 elif(httpmethod
=="POST"):
1426 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1427 geturls_text
= urlopen(geturls_request
, data
=postdata
);
1429 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1430 geturls_text
= urlopen(geturls_request
);
1431 except HTTPError
as geturls_text_error
:
1432 geturls_text
= geturls_text_error
;
1433 log
.info("Error With URL "+httpurl
);
1435 log
.info("Error With URL "+httpurl
);
1437 except socket
.timeout
:
1438 log
.info("Error With URL "+httpurl
);
1440 httpcodeout
= geturls_text
.getcode();
1441 httpversionout
= "1.1";
1442 httpmethodout
= httpmethod
;
1443 httpurlout
= geturls_text
.geturl();
1444 httpheaderout
= geturls_text
.headers
;
1445 httpheadersentout
= httpheaders
;
1447 prehttpheaderout
= geturls_text
.headers
;
1448 httpheaderkeys
= geturls_text
.headers
.keys();
1449 imax
= len(httpheaderkeys
);
1453 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1455 except AttributeError:
1456 httpheaderout
= geturls_text
.headers
;
1457 if(isinstance(httpheaderout
, list)):
1458 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1459 if(isinstance(httpheadersentout
, list)):
1460 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1461 log
.info("Downloading URL "+httpurl
);
1462 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
1463 if(sys
.version
[0]=="2"):
1464 strbuf
= StringIO(geturls_text
.read());
1465 if(sys
.version
[0]>="3"):
1466 strbuf
= BytesIO(geturls_text
.read());
1467 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1468 returnval_content
= gzstrbuf
.read()[:];
1469 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
1470 returnval_content
= geturls_text
.read()[:];
1471 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1472 returnval_content
= brotli
.decompress(returnval_content
);
1473 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
1474 geturls_text
.close();
1477 def download_from_url_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1478 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1479 exec_time_start
= time
.time();
1480 myhash
= hashlib
.new("sha1");
1481 if(sys
.version
[0]=="2"):
1482 myhash
.update(httpurl
);
1483 myhash
.update(str(buffersize
));
1484 myhash
.update(str(exec_time_start
));
1485 if(sys
.version
[0]>="3"):
1486 myhash
.update(httpurl
.encode('utf-8'));
1487 myhash
.update(str(buffersize
).encode('utf-8'));
1488 myhash
.update(str(exec_time_start
).encode('utf-8'));
1489 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1491 sleep
= geturls_download_sleep
;
1492 urlparts
= urlparse
.urlparse(httpurl
);
1493 if(isinstance(httpheaders
, list)):
1494 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1495 if(urlparts
.username
is not None or urlparts
.password
is not None):
1496 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1497 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1498 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1499 if(isinstance(httpheaders
, dict)):
1500 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1501 geturls_opener
.addheaders
= httpheaders
;
1502 install_opener(geturls_opener
);
1504 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1505 if(postdata
is not None and not isinstance(postdata
, dict)):
1506 postdata
= urlencode(postdata
);
1508 if(httpmethod
=="GET"):
1509 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1510 geturls_text
= urlopen(geturls_request
);
1511 elif(httpmethod
=="POST"):
1512 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1513 geturls_text
= urlopen(geturls_request
, data
=postdata
);
1515 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1516 geturls_text
= urlopen(geturls_request
);
1517 except HTTPError
as geturls_text_error
:
1518 geturls_text
= geturls_text_error
;
1519 log
.info("Error With URL "+httpurl
);
1521 log
.info("Error With URL "+httpurl
);
1523 except socket
.timeout
:
1524 log
.info("Error With URL "+httpurl
);
1526 httpcodeout
= geturls_text
.getcode();
1527 httpversionout
= "1.1";
1528 httpmethodout
= httpmethod
;
1529 httpurlout
= geturls_text
.geturl();
1530 httpheaderout
= geturls_text
.headers
;
1531 httpheadersentout
= httpheaders
;
1533 prehttpheaderout
= geturls_text
.headers
;
1534 httpheaderkeys
= geturls_text
.headers
.keys();
1535 imax
= len(httpheaderkeys
);
1539 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1541 except AttributeError:
1542 httpheaderout
= geturls_text
.headers
;
1543 if(isinstance(httpheaderout
, list)):
1544 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1545 if(isinstance(httpheadersentout
, list)):
1546 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1547 downloadsize
= httpheaderout
.get('Content-Length');
1548 if(downloadsize
is not None):
1549 downloadsize
= int(downloadsize
);
1550 if downloadsize
is None: downloadsize
= 0;
1553 log
.info("Downloading URL "+httpurl
);
1554 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1555 tmpfilename
= f
.name
;
1557 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple())));
1558 except AttributeError:
1560 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
1562 databytes
= geturls_text
.read(buffersize
);
1563 if not databytes
: break;
1564 datasize
= len(databytes
);
1565 fulldatasize
= datasize
+ fulldatasize
;
1568 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1569 downloaddiff
= fulldatasize
- prevdownsize
;
1570 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1571 prevdownsize
= fulldatasize
;
1574 geturls_text
.close();
1575 exec_time_end
= time
.time();
1576 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1577 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1580 def download_from_url_to_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1581 global geturls_download_sleep
;
1583 sleep
= geturls_download_sleep
;
1584 if(not outfile
=="-"):
1585 outpath
= outpath
.rstrip(os
.path
.sep
);
1586 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1587 if(not os
.path
.exists(outpath
)):
1588 os
.makedirs(outpath
);
1589 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1591 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1593 pretmpfilename
= download_from_url_file_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1594 if(not pretmpfilename
):
1596 tmpfilename
= pretmpfilename
['Filename'];
1597 downloadsize
= os
.path
.getsize(tmpfilename
);
1599 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1600 exec_time_start
= time
.time();
1601 shutil
.move(tmpfilename
, filepath
);
1603 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple())));
1604 except AttributeError:
1606 exec_time_end
= time
.time();
1607 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1608 if(os
.path
.exists(tmpfilename
)):
1609 os
.remove(tmpfilename
);
1610 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent':pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1611 if(outfile
=="-" and sys
.version
[0]=="2"):
1612 pretmpfilename
= download_from_url_file_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1613 if(not pretmpfilename
):
1615 tmpfilename
= pretmpfilename
['Filename'];
1616 downloadsize
= os
.path
.getsize(tmpfilename
);
1619 exec_time_start
= time
.time();
1620 with
open(tmpfilename
, 'rb') as ft
:
1623 databytes
= ft
.read(buffersize
[1]);
1624 if not databytes
: break;
1625 datasize
= len(databytes
);
1626 fulldatasize
= datasize
+ fulldatasize
;
1629 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1630 downloaddiff
= fulldatasize
- prevdownsize
;
1631 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1632 prevdownsize
= fulldatasize
;
1635 fdata
= f
.getvalue();
1638 os
.remove(tmpfilename
);
1639 exec_time_end
= time
.time();
1640 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1641 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1642 if(outfile
=="-" and sys
.version
[0]>="3"):
1643 pretmpfilename
= download_from_url_file_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1644 tmpfilename
= pretmpfilename
['Filename'];
1645 downloadsize
= os
.path
.getsize(tmpfilename
);
1648 exec_time_start
= time
.time();
1649 with
open(tmpfilename
, 'rb') as ft
:
1652 databytes
= ft
.read(buffersize
[1]);
1653 if not databytes
: break;
1654 datasize
= len(databytes
);
1655 fulldatasize
= datasize
+ fulldatasize
;
1658 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1659 downloaddiff
= fulldatasize
- prevdownsize
;
1660 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1661 prevdownsize
= fulldatasize
;
1664 fdata
= f
.getvalue();
1667 os
.remove(tmpfilename
);
1668 exec_time_end
= time
.time();
1669 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1670 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1674 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1675 global geturls_download_sleep
, havebrotli
;
1677 sleep
= geturls_download_sleep
;
1678 urlparts
= urlparse
.urlparse(httpurl
);
1679 if(isinstance(httpheaders
, list)):
1680 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1681 if(urlparts
.username
is not None or urlparts
.password
is not None):
1682 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1683 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1685 if(postdata
is not None and not isinstance(postdata
, dict)):
1686 postdata
= urlencode(postdata
);
1688 if(httpmethod
=="GET"):
1689 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1690 elif(httpmethod
=="POST"):
1691 geturls_text
= requests
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1693 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1694 except requests
.exceptions
.ConnectTimeout
:
1695 log
.info("Error With URL "+httpurl
);
1697 except requests
.exceptions
.ConnectError
:
1698 log
.info("Error With URL "+httpurl
);
1700 except socket
.timeout
:
1701 log
.info("Error With URL "+httpurl
);
1703 httpcodeout
= geturls_text
.status_code
;
1704 httpversionout
= "1.1";
1705 httpmethodout
= httpmethod
;
1706 httpurlout
= geturls_text
.url
;
1707 httpheaderout
= geturls_text
.headers
;
1708 httpheadersentout
= httpheaders
;
1709 if(isinstance(httpheaderout
, list)):
1710 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1711 if(isinstance(httpheadersentout
, list)):
1712 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1713 log
.info("Downloading URL "+httpurl
);
1714 if(httpheaderout
.get('Content-Type')=="gzip" or httpheaderout
.get('Content-Type')=="deflate"):
1715 if(sys
.version
[0]=="2"):
1716 strbuf
= StringIO(geturls_text
.content
);
1717 if(sys
.version
[0]>="3"):
1718 strbuf
= BytesIO(geturls_text
.content
);
1719 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1720 returnval_content
= gzstrbuf
.content
[:];
1721 if(httpheaderout
.get('Content-Type')!="gzip" and httpheaderout
.get('Content-Type')!="deflate" and httpheaderout
.get('Content-Type')!="br"):
1722 returnval_content
= geturls_text
.content
[:];
1723 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1724 returnval_content
= brotli
.decompress(returnval_content
);
1725 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
1726 geturls_text
.close();
1729 if(not haverequests
):
1730 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1731 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
1735 def download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1736 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1737 exec_time_start
= time
.time();
1738 myhash
= hashlib
.new("sha1");
1739 if(sys
.version
[0]=="2"):
1740 myhash
.update(httpurl
);
1741 myhash
.update(str(buffersize
));
1742 myhash
.update(str(exec_time_start
));
1743 if(sys
.version
[0]>="3"):
1744 myhash
.update(httpurl
.encode('utf-8'));
1745 myhash
.update(str(buffersize
).encode('utf-8'));
1746 myhash
.update(str(exec_time_start
).encode('utf-8'));
1747 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1749 sleep
= geturls_download_sleep
;
1750 urlparts
= urlparse
.urlparse(httpurl
);
1751 if(isinstance(httpheaders
, list)):
1752 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1753 if(urlparts
.username
is not None or urlparts
.password
is not None):
1754 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1755 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1757 if(postdata
is not None and not isinstance(postdata
, dict)):
1758 postdata
= urlencode(postdata
);
1760 if(httpmethod
=="GET"):
1761 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1762 elif(httpmethod
=="POST"):
1763 geturls_text
= requests
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1765 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1766 except requests
.exceptions
.ConnectTimeout
:
1767 log
.info("Error With URL "+httpurl
);
1769 except requests
.exceptions
.ConnectError
:
1770 log
.info("Error With URL "+httpurl
);
1772 except socket
.timeout
:
1773 log
.info("Error With URL "+httpurl
);
1775 httpcodeout
= geturls_text
.status_code
;
1776 httpversionout
= "1.1";
1777 httpmethodout
= httpmethod
;
1778 httpurlout
= geturls_text
.url
;
1779 httpheaderout
= geturls_text
.headers
;
1780 httpheadersentout
= httpheaders
;
1781 if(isinstance(httpheaderout
, list)):
1782 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1783 if(isinstance(httpheadersentout
, list)):
1784 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1785 downloadsize
= int(httpheaderout
.get('Content-Length'));
1786 if(downloadsize
is not None):
1787 downloadsize
= int(downloadsize
);
1788 if downloadsize
is None: downloadsize
= 0;
1791 log
.info("Downloading URL "+httpurl
);
1792 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1793 tmpfilename
= f
.name
;
1795 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple())));
1796 except AttributeError:
1798 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
1799 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
1800 datasize
= len(databytes
);
1801 fulldatasize
= datasize
+ fulldatasize
;
1804 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1805 downloaddiff
= fulldatasize
- prevdownsize
;
1806 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1807 prevdownsize
= fulldatasize
;
1810 geturls_text
.close();
1811 exec_time_end
= time
.time();
1812 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1813 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1816 if(not haverequests
):
1817 def download_from_url_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1818 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
1822 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1823 global geturls_download_sleep
;
1825 sleep
= geturls_download_sleep
;
1826 if(not outfile
=="-"):
1827 outpath
= outpath
.rstrip(os
.path
.sep
);
1828 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1829 if(not os
.path
.exists(outpath
)):
1830 os
.makedirs(outpath
);
1831 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1833 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1835 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1836 if(not pretmpfilename
):
1838 tmpfilename
= pretmpfilename
['Filename'];
1839 downloadsize
= os
.path
.getsize(tmpfilename
);
1841 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1842 exec_time_start
= time
.time();
1843 shutil
.move(tmpfilename
, filepath
);
1845 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple())));
1846 except AttributeError:
1848 exec_time_end
= time
.time();
1849 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1850 if(os
.path
.exists(tmpfilename
)):
1851 os
.remove(tmpfilename
);
1852 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1853 if(outfile
=="-" and sys
.version
[0]=="2"):
1854 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1855 if(not pretmpfilename
):
1857 tmpfilename
= pretmpfilename
['Filename'];
1858 downloadsize
= os
.path
.getsize(tmpfilename
);
1861 exec_time_start
= time
.time();
1862 with
open(tmpfilename
, 'rb') as ft
:
1865 databytes
= ft
.read(buffersize
[1]);
1866 if not databytes
: break;
1867 datasize
= len(databytes
);
1868 fulldatasize
= datasize
+ fulldatasize
;
1871 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1872 downloaddiff
= fulldatasize
- prevdownsize
;
1873 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1874 prevdownsize
= fulldatasize
;
1877 fdata
= f
.getvalue();
1880 os
.remove(tmpfilename
);
1881 exec_time_end
= time
.time();
1882 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1883 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1884 if(outfile
=="-" and sys
.version
[0]>="3"):
1885 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1886 tmpfilename
= pretmpfilename
['Filename'];
1887 downloadsize
= os
.path
.getsize(tmpfilename
);
1890 exec_time_start
= time
.time();
1891 with
open(tmpfilename
, 'rb') as ft
:
1894 databytes
= ft
.read(buffersize
[1]);
1895 if not databytes
: break;
1896 datasize
= len(databytes
);
1897 fulldatasize
= datasize
+ fulldatasize
;
1900 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1901 downloaddiff
= fulldatasize
- prevdownsize
;
1902 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1903 prevdownsize
= fulldatasize
;
1906 fdata
= f
.getvalue();
1909 os
.remove(tmpfilename
);
1910 exec_time_end
= time
.time();
1911 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1912 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1915 if(not haverequests
):
1916 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1917 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
1921 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1922 global geturls_download_sleep
, havebrotli
;
1924 sleep
= geturls_download_sleep
;
1925 urlparts
= urlparse
.urlparse(httpurl
);
1926 if(isinstance(httpheaders
, list)):
1927 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1928 if(urlparts
.username
is not None or urlparts
.password
is not None):
1929 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1930 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1932 if(postdata
is not None and not isinstance(postdata
, dict)):
1933 postdata
= urlencode(postdata
);
1935 if(httpmethod
=="GET"):
1936 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1937 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1938 elif(httpmethod
=="POST"):
1939 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1940 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1942 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1943 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1944 except httpx
.ConnectTimeout
:
1945 log
.info("Error With URL "+httpurl
);
1947 except httpx
.ConnectError
:
1948 log
.info("Error With URL "+httpurl
);
1950 except socket
.timeout
:
1951 log
.info("Error With URL "+httpurl
);
1953 httpcodeout
= geturls_text
.status_code
;
1954 httpversionout
= geturls_text
.http_version
;
1955 httpmethodout
= httpmethod
;
1956 httpurlout
= str(geturls_text
.url
);
1957 httpheaderout
= geturls_text
.headers
;
1958 httpheadersentout
= httpheaders
;
1959 if(isinstance(httpheaderout
, list)):
1960 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1961 if(isinstance(httpheadersentout
, list)):
1962 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1963 log
.info("Downloading URL "+httpurl
);
1964 if(httpheaderout
.get('Content-Type')=="gzip" or httpheaderout
.get('Content-Type')=="deflate"):
1965 if(sys
.version
[0]=="2"):
1966 strbuf
= StringIO(geturls_text
.content
);
1967 if(sys
.version
[0]>="3"):
1968 strbuf
= BytesIO(geturls_text
.content
);
1969 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1970 returnval_content
= gzstrbuf
.content
[:];
1971 if(httpheaderout
.get('Content-Type')!="gzip" and httpheaderout
.get('Content-Type')!="deflate" and httpheaderout
.get('Content-Type')!="br"):
1972 returnval_content
= geturls_text
.content
[:];
1973 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1974 returnval_content
= brotli
.decompress(returnval_content
);
1975 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
1976 geturls_text
.close();
1980 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1981 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
1985 def download_from_url_file_with_httpx(httpurl
, httpheaders
, httpcookie
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1986 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1987 exec_time_start
= time
.time();
1988 myhash
= hashlib
.new("sha1");
1989 if(sys
.version
[0]=="2"):
1990 myhash
.update(httpurl
);
1991 myhash
.update(str(buffersize
));
1992 myhash
.update(str(exec_time_start
));
1993 if(sys
.version
[0]>="3"):
1994 myhash
.update(httpurl
.encode('utf-8'));
1995 myhash
.update(str(buffersize
).encode('utf-8'));
1996 myhash
.update(str(exec_time_start
).encode('utf-8'));
1997 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1999 sleep
= geturls_download_sleep
;
2000 urlparts
= urlparse
.urlparse(httpurl
);
2001 if(isinstance(httpheaders
, list)):
2002 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2003 if(urlparts
.username
is not None or urlparts
.password
is not None):
2004 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2005 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2007 if(postdata
is not None and not isinstance(postdata
, dict)):
2008 postdata
= urlencode(postdata
);
2010 if(httpmethod
=="GET"):
2011 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2012 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2013 elif(httpmethod
=="POST"):
2014 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2015 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2017 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
2018 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2019 except httpx
.ConnectTimeout
:
2020 log
.info("Error With URL "+httpurl
);
2022 except httpx
.ConnectError
:
2023 log
.info("Error With URL "+httpurl
);
2025 except socket
.timeout
:
2026 log
.info("Error With URL "+httpurl
);
2028 httpcodeout
= geturls_text
.status_code
;
2029 httpversionout
= geturls_text
.http_version
;
2030 httpmethodout
= httpmethod
;
2031 httpurlout
= str(geturls_text
.url
);
2032 httpheaderout
= geturls_text
.headers
;
2033 httpheadersentout
= httpheaders
;
2034 if(isinstance(httpheaderout
, list)):
2035 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2036 if(isinstance(httpheadersentout
, list)):
2037 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2038 downloadsize
= int(httpheaderout
.get('Content-Length'));
2039 if(downloadsize
is not None):
2040 downloadsize
= int(downloadsize
);
2041 if downloadsize
is None: downloadsize
= 0;
2044 log
.info("Downloading URL "+httpurl
);
2045 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2046 tmpfilename
= f
.name
;
2048 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple())));
2049 except AttributeError:
2051 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
2052 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
2053 datasize
= len(databytes
);
2054 fulldatasize
= datasize
+ fulldatasize
;
2057 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2058 downloaddiff
= fulldatasize
- prevdownsize
;
2059 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2060 prevdownsize
= fulldatasize
;
2063 geturls_text
.close();
2064 exec_time_end
= time
.time();
2065 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2066 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2070 def download_from_url_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2071 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
2075 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2076 global geturls_download_sleep
;
2078 sleep
= geturls_download_sleep
;
2079 if(not outfile
=="-"):
2080 outpath
= outpath
.rstrip(os
.path
.sep
);
2081 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2082 if(not os
.path
.exists(outpath
)):
2083 os
.makedirs(outpath
);
2084 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2086 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2088 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2089 if(not pretmpfilename
):
2091 tmpfilename
= pretmpfilename
['Filename'];
2092 downloadsize
= os
.path
.getsize(tmpfilename
);
2094 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2095 exec_time_start
= time
.time();
2096 shutil
.move(tmpfilename
, filepath
);
2098 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple())));
2099 except AttributeError:
2101 exec_time_end
= time
.time();
2102 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2103 if(os
.path
.exists(tmpfilename
)):
2104 os
.remove(tmpfilename
);
2105 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2106 if(outfile
=="-" and sys
.version
[0]=="2"):
2107 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2108 if(not pretmpfilename
):
2110 tmpfilename
= pretmpfilename
['Filename'];
2111 downloadsize
= os
.path
.getsize(tmpfilename
);
2114 exec_time_start
= time
.time();
2115 with
open(tmpfilename
, 'rb') as ft
:
2118 databytes
= ft
.read(buffersize
[1]);
2119 if not databytes
: break;
2120 datasize
= len(databytes
);
2121 fulldatasize
= datasize
+ fulldatasize
;
2124 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2125 downloaddiff
= fulldatasize
- prevdownsize
;
2126 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2127 prevdownsize
= fulldatasize
;
2130 fdata
= f
.getvalue();
2133 os
.remove(tmpfilename
);
2134 exec_time_end
= time
.time();
2135 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2136 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2137 if(outfile
=="-" and sys
.version
[0]>="3"):
2138 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2139 tmpfilename
= pretmpfilename
['Filename'];
2140 downloadsize
= os
.path
.getsize(tmpfilename
);
2143 exec_time_start
= time
.time();
2144 with
open(tmpfilename
, 'rb') as ft
:
2147 databytes
= ft
.read(buffersize
[1]);
2148 if not databytes
: break;
2149 datasize
= len(databytes
);
2150 fulldatasize
= datasize
+ fulldatasize
;
2153 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2154 downloaddiff
= fulldatasize
- prevdownsize
;
2155 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2156 prevdownsize
= fulldatasize
;
2159 fdata
= f
.getvalue();
2162 os
.remove(tmpfilename
);
2163 exec_time_end
= time
.time();
2164 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2165 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2169 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2170 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
2174 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2175 global geturls_download_sleep
, havebrotli
;
2177 sleep
= geturls_download_sleep
;
2178 urlparts
= urlparse
.urlparse(httpurl
);
2179 if(isinstance(httpheaders
, list)):
2180 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2181 if(urlparts
.username
is not None or urlparts
.password
is not None):
2182 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2183 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2185 if(postdata
is not None and not isinstance(postdata
, dict)):
2186 postdata
= urlencode(postdata
);
2188 if(httpmethod
=="GET"):
2189 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2190 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2191 elif(httpmethod
=="POST"):
2192 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2193 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2195 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2196 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2197 except httpx
.ConnectTimeout
:
2198 log
.info("Error With URL "+httpurl
);
2200 except httpx
.ConnectError
:
2201 log
.info("Error With URL "+httpurl
);
2203 except socket
.timeout
:
2204 log
.info("Error With URL "+httpurl
);
2206 httpcodeout
= geturls_text
.status_code
;
2207 httpversionout
= geturls_text
.http_version
;
2208 httpmethodout
= httpmethod
;
2209 httpurlout
= str(geturls_text
.url
);
2210 httpheaderout
= geturls_text
.headers
;
2211 httpheadersentout
= httpheaders
;
2212 if(isinstance(httpheaderout
, list)):
2213 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2214 if(isinstance(httpheadersentout
, list)):
2215 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2216 log
.info("Downloading URL "+httpurl
);
2217 if(httpheaderout
.get('Content-Type')=="gzip" or httpheaderout
.get('Content-Type')=="deflate"):
2218 if(sys
.version
[0]=="2"):
2219 strbuf
= StringIO(geturls_text
.content
);
2220 if(sys
.version
[0]>="3"):
2221 strbuf
= BytesIO(geturls_text
.content
);
2222 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
2223 returnval_content
= gzstrbuf
.content
[:];
2224 if(httpheaderout
.get('Content-Type')!="gzip" and httpheaderout
.get('Content-Type')!="deflate" and httpheaderout
.get('Content-Type')!="br"):
2225 returnval_content
= geturls_text
.content
[:];
2226 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2227 returnval_content
= brotli
.decompress(returnval_content
);
2228 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
2229 geturls_text
.close();
2233 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2234 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
2238 def download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpcookie
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2239 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
2240 exec_time_start
= time
.time();
2241 myhash
= hashlib
.new("sha1");
2242 if(sys
.version
[0]=="2"):
2243 myhash
.update(httpurl
);
2244 myhash
.update(str(buffersize
));
2245 myhash
.update(str(exec_time_start
));
2246 if(sys
.version
[0]>="3"):
2247 myhash
.update(httpurl
.encode('utf-8'));
2248 myhash
.update(str(buffersize
).encode('utf-8'));
2249 myhash
.update(str(exec_time_start
).encode('utf-8'));
2250 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2252 sleep
= geturls_download_sleep
;
2253 urlparts
= urlparse
.urlparse(httpurl
);
2254 if(isinstance(httpheaders
, list)):
2255 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2256 if(urlparts
.username
is not None or urlparts
.password
is not None):
2257 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2258 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2260 if(postdata
is not None and not isinstance(postdata
, dict)):
2261 postdata
= urlencode(postdata
);
2263 if(httpmethod
=="GET"):
2264 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2265 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2266 elif(httpmethod
=="POST"):
2267 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2268 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
2270 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
2271 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
2272 except httpx
.ConnectTimeout
:
2273 log
.info("Error With URL "+httpurl
);
2275 except httpx
.ConnectError
:
2276 log
.info("Error With URL "+httpurl
);
2278 except socket
.timeout
:
2279 log
.info("Error With URL "+httpurl
);
2281 httpcodeout
= geturls_text
.status_code
;
2282 httpversionout
= geturls_text
.http_version
;
2283 httpmethodout
= httpmethod
;
2284 httpurlout
= str(geturls_text
.url
);
2285 httpheaderout
= geturls_text
.headers
;
2286 httpheadersentout
= httpheaders
;
2287 if(isinstance(httpheaderout
, list)):
2288 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2289 if(isinstance(httpheadersentout
, list)):
2290 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2291 downloadsize
= int(httpheaderout
.get('Content-Length'));
2292 if(downloadsize
is not None):
2293 downloadsize
= int(downloadsize
);
2294 if downloadsize
is None: downloadsize
= 0;
2297 log
.info("Downloading URL "+httpurl
);
2298 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2299 tmpfilename
= f
.name
;
2301 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple())));
2302 except AttributeError:
2304 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
2305 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
2306 datasize
= len(databytes
);
2307 fulldatasize
= datasize
+ fulldatasize
;
2310 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2311 downloaddiff
= fulldatasize
- prevdownsize
;
2312 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2313 prevdownsize
= fulldatasize
;
2316 geturls_text
.close();
2317 exec_time_end
= time
.time();
2318 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2319 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2323 def download_from_url_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2324 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
2328 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2329 global geturls_download_sleep
;
2331 sleep
= geturls_download_sleep
;
2332 if(not outfile
=="-"):
2333 outpath
= outpath
.rstrip(os
.path
.sep
);
2334 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2335 if(not os
.path
.exists(outpath
)):
2336 os
.makedirs(outpath
);
2337 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2339 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2341 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2342 if(not pretmpfilename
):
2344 tmpfilename
= pretmpfilename
['Filename'];
2345 downloadsize
= os
.path
.getsize(tmpfilename
);
2347 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2348 exec_time_start
= time
.time();
2349 shutil
.move(tmpfilename
, filepath
);
2351 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple())));
2352 except AttributeError:
2354 exec_time_end
= time
.time();
2355 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2356 if(os
.path
.exists(tmpfilename
)):
2357 os
.remove(tmpfilename
);
2358 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2359 if(outfile
=="-" and sys
.version
[0]=="2"):
2360 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2361 if(not pretmpfilename
):
2363 tmpfilename
= pretmpfilename
['Filename'];
2364 downloadsize
= os
.path
.getsize(tmpfilename
);
2367 exec_time_start
= time
.time();
2368 with
open(tmpfilename
, 'rb') as ft
:
2371 databytes
= ft
.read(buffersize
[1]);
2372 if not databytes
: break;
2373 datasize
= len(databytes
);
2374 fulldatasize
= datasize
+ fulldatasize
;
2377 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2378 downloaddiff
= fulldatasize
- prevdownsize
;
2379 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2380 prevdownsize
= fulldatasize
;
2383 fdata
= f
.getvalue();
2386 os
.remove(tmpfilename
);
2387 exec_time_end
= time
.time();
2388 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2389 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2390 if(outfile
=="-" and sys
.version
[0]>="3"):
2391 pretmpfilename
= download_from_url_file_with_httpx2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2392 tmpfilename
= pretmpfilename
['Filename'];
2393 downloadsize
= os
.path
.getsize(tmpfilename
);
2396 exec_time_start
= time
.time();
2397 with
open(tmpfilename
, 'rb') as ft
:
2400 databytes
= ft
.read(buffersize
[1]);
2401 if not databytes
: break;
2402 datasize
= len(databytes
);
2403 fulldatasize
= datasize
+ fulldatasize
;
2406 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2407 downloaddiff
= fulldatasize
- prevdownsize
;
2408 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2409 prevdownsize
= fulldatasize
;
2412 fdata
= f
.getvalue();
2415 os
.remove(tmpfilename
);
2416 exec_time_end
= time
.time();
2417 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2418 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2422 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2423 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
2427 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2428 global geturls_download_sleep
, havebrotli
;
2430 sleep
= geturls_download_sleep
;
2431 urlparts
= urlparse
.urlparse(httpurl
);
2432 if(isinstance(httpheaders
, list)):
2433 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2434 if(urlparts
.username
is not None or urlparts
.password
is not None):
2435 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2436 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2438 if(postdata
is not None and not isinstance(postdata
, dict)):
2439 postdata
= urlencode(postdata
);
2441 if(httpmethod
=="GET"):
2442 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2443 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2444 elif(httpmethod
=="POST"):
2445 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2446 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2448 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2449 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2450 except httpcore
.ConnectTimeout
:
2451 log
.info("Error With URL "+httpurl
);
2453 except httpcore
.ConnectError
:
2454 log
.info("Error With URL "+httpurl
);
2456 except socket
.timeout
:
2457 log
.info("Error With URL "+httpurl
);
2459 httpcodeout
= geturls_text
.status
;
2460 httpversionout
= "1.1";
2461 httpmethodout
= httpmethod
;
2462 httpurlout
= str(httpurl
);
2463 httpheaderout
= geturls_text
.headers
;
2464 httpheadersentout
= httpheaders
;
2465 if(isinstance(httpheaderout
, list)):
2466 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2467 if(isinstance(httpheadersentout
, list)):
2468 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2469 log
.info("Downloading URL "+httpurl
);
2470 if(httpheaderout
.get('Content-Type')=="gzip" or httpheaderout
.get('Content-Type')=="deflate"):
2471 if(sys
.version
[0]=="2"):
2472 strbuf
= StringIO(geturls_text
.content
);
2473 if(sys
.version
[0]>="3"):
2474 strbuf
= BytesIO(geturls_text
.content
);
2475 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
2476 returnval_content
= gzstrbuf
.content
[:];
2477 if(httpheaderout
.get('Content-Type')!="gzip" and httpheaderout
.get('Content-Type')!="deflate" and httpheaderout
.get('Content-Type')!="br"):
2478 returnval_content
= geturls_text
.content
[:];
2479 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2480 returnval_content
= brotli
.decompress(returnval_content
);
2481 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
2482 geturls_text
.close();
2485 if(not havehttpcore
):
2486 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2487 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
2491 def download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpcookie
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2492 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
2493 exec_time_start
= time
.time();
2494 myhash
= hashlib
.new("sha1");
2495 if(sys
.version
[0]=="2"):
2496 myhash
.update(httpurl
);
2497 myhash
.update(str(buffersize
));
2498 myhash
.update(str(exec_time_start
));
2499 if(sys
.version
[0]>="3"):
2500 myhash
.update(httpurl
.encode('utf-8'));
2501 myhash
.update(str(buffersize
).encode('utf-8'));
2502 myhash
.update(str(exec_time_start
).encode('utf-8'));
2503 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2505 sleep
= geturls_download_sleep
;
2506 urlparts
= urlparse
.urlparse(httpurl
);
2507 if(isinstance(httpheaders
, list)):
2508 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2509 if(urlparts
.username
is not None or urlparts
.password
is not None):
2510 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2511 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2513 if(postdata
is not None and not isinstance(postdata
, dict)):
2514 postdata
= urlencode(postdata
);
2516 if(httpmethod
=="GET"):
2517 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2518 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2519 elif(httpmethod
=="POST"):
2520 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2521 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2523 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
2524 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2525 except httpcore
.ConnectTimeout
:
2526 log
.info("Error With URL "+httpurl
);
2528 except httpcore
.ConnectError
:
2529 log
.info("Error With URL "+httpurl
);
2531 except socket
.timeout
:
2532 log
.info("Error With URL "+httpurl
);
2534 httpcodeout
= geturls_text
.status
;
2535 httpversionout
= "1.1";
2536 httpmethodout
= httpmethod
;
2537 httpurlout
= str(httpurl
);
2538 httpheaderout
= geturls_text
.headers
;
2539 httpheadersentout
= httpheaders
;
2540 if(isinstance(httpheaderout
, list)):
2541 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2542 if(isinstance(httpheadersentout
, list)):
2543 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2544 downloadsize
= int(httpheaderout
.get('Content-Length'));
2545 if(downloadsize
is not None):
2546 downloadsize
= int(downloadsize
);
2547 if downloadsize
is None: downloadsize
= 0;
2550 log
.info("Downloading URL "+httpurl
);
2551 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2552 tmpfilename
= f
.name
;
2554 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple())));
2555 except AttributeError:
2557 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
2558 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
2559 datasize
= len(databytes
);
2560 fulldatasize
= datasize
+ fulldatasize
;
2563 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2564 downloaddiff
= fulldatasize
- prevdownsize
;
2565 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2566 prevdownsize
= fulldatasize
;
2569 geturls_text
.close();
2570 exec_time_end
= time
.time();
2571 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2572 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2575 if(not havehttpcore
):
2576 def download_from_url_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2577 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
2581 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2582 global geturls_download_sleep
;
2584 sleep
= geturls_download_sleep
;
2585 if(not outfile
=="-"):
2586 outpath
= outpath
.rstrip(os
.path
.sep
);
2587 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2588 if(not os
.path
.exists(outpath
)):
2589 os
.makedirs(outpath
);
2590 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2592 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2594 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2595 if(not pretmpfilename
):
2597 tmpfilename
= pretmpfilename
['Filename'];
2598 downloadsize
= os
.path
.getsize(tmpfilename
);
2600 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2601 exec_time_start
= time
.time();
2602 shutil
.move(tmpfilename
, filepath
);
2604 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple())));
2605 except AttributeError:
2607 exec_time_end
= time
.time();
2608 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2609 if(os
.path
.exists(tmpfilename
)):
2610 os
.remove(tmpfilename
);
2611 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2612 if(outfile
=="-" and sys
.version
[0]=="2"):
2613 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2614 if(not pretmpfilename
):
2616 tmpfilename
= pretmpfilename
['Filename'];
2617 downloadsize
= os
.path
.getsize(tmpfilename
);
2620 exec_time_start
= time
.time();
2621 with
open(tmpfilename
, 'rb') as ft
:
2624 databytes
= ft
.read(buffersize
[1]);
2625 if not databytes
: break;
2626 datasize
= len(databytes
);
2627 fulldatasize
= datasize
+ fulldatasize
;
2630 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2631 downloaddiff
= fulldatasize
- prevdownsize
;
2632 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2633 prevdownsize
= fulldatasize
;
2636 fdata
= f
.getvalue();
2639 os
.remove(tmpfilename
);
2640 exec_time_end
= time
.time();
2641 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2642 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2643 if(outfile
=="-" and sys
.version
[0]>="3"):
2644 pretmpfilename
= download_from_url_file_with_httpcore(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2645 tmpfilename
= pretmpfilename
['Filename'];
2646 downloadsize
= os
.path
.getsize(tmpfilename
);
2649 exec_time_start
= time
.time();
2650 with
open(tmpfilename
, 'rb') as ft
:
2653 databytes
= ft
.read(buffersize
[1]);
2654 if not databytes
: break;
2655 datasize
= len(databytes
);
2656 fulldatasize
= datasize
+ fulldatasize
;
2659 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2660 downloaddiff
= fulldatasize
- prevdownsize
;
2661 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2662 prevdownsize
= fulldatasize
;
2665 fdata
= f
.getvalue();
2668 os
.remove(tmpfilename
);
2669 exec_time_end
= time
.time();
2670 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2671 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2674 if(not havehttpcore
):
2675 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2676 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
2680 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2681 global geturls_download_sleep
, havebrotli
;
2683 sleep
= geturls_download_sleep
;
2684 urlparts
= urlparse
.urlparse(httpurl
);
2685 if(isinstance(httpheaders
, list)):
2686 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2687 if(urlparts
.username
is not None or urlparts
.password
is not None):
2688 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2689 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2691 if(postdata
is not None and not isinstance(postdata
, dict)):
2692 postdata
= urlencode(postdata
);
2694 if(httpmethod
=="GET"):
2695 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2696 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2697 elif(httpmethod
=="POST"):
2698 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2699 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2701 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2702 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2703 except httpcore
.ConnectTimeout
:
2704 log
.info("Error With URL "+httpurl
);
2706 except httpcore
.ConnectError
:
2707 log
.info("Error With URL "+httpurl
);
2709 except socket
.timeout
:
2710 log
.info("Error With URL "+httpurl
);
2712 httpcodeout
= geturls_text
.status
;
2713 httpversionout
= "1.1";
2714 httpmethodout
= httpmethod
;
2715 httpurlout
= str(httpurl
);
2716 httpheaderout
= geturls_text
.headers
;
2717 httpheadersentout
= httpheaders
;
2718 if(isinstance(httpheaderout
, list)):
2719 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2720 if(isinstance(httpheadersentout
, list)):
2721 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2722 log
.info("Downloading URL "+httpurl
);
2723 if(httpheaderout
.get('Content-Type')=="gzip" or httpheaderout
.get('Content-Type')=="deflate"):
2724 if(sys
.version
[0]=="2"):
2725 strbuf
= StringIO(geturls_text
.content
);
2726 if(sys
.version
[0]>="3"):
2727 strbuf
= BytesIO(geturls_text
.content
);
2728 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
2729 returnval_content
= gzstrbuf
.content
[:];
2730 if(httpheaderout
.get('Content-Type')!="gzip" and httpheaderout
.get('Content-Type')!="deflate" and httpheaderout
.get('Content-Type')!="br"):
2731 returnval_content
= geturls_text
.content
[:];
2732 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2733 returnval_content
= brotli
.decompress(returnval_content
);
2734 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
2735 geturls_text
.close();
2738 if(not havehttpcore
):
2739 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2740 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
2744 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpcookie
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2745 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
2746 exec_time_start
= time
.time();
2747 myhash
= hashlib
.new("sha1");
2748 if(sys
.version
[0]=="2"):
2749 myhash
.update(httpurl
);
2750 myhash
.update(str(buffersize
));
2751 myhash
.update(str(exec_time_start
));
2752 if(sys
.version
[0]>="3"):
2753 myhash
.update(httpurl
.encode('utf-8'));
2754 myhash
.update(str(buffersize
).encode('utf-8'));
2755 myhash
.update(str(exec_time_start
).encode('utf-8'));
2756 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2758 sleep
= geturls_download_sleep
;
2759 urlparts
= urlparse
.urlparse(httpurl
);
2760 if(isinstance(httpheaders
, list)):
2761 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2762 if(urlparts
.username
is not None or urlparts
.password
is not None):
2763 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2764 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2766 if(postdata
is not None and not isinstance(postdata
, dict)):
2767 postdata
= urlencode(postdata
);
2769 if(httpmethod
=="GET"):
2770 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2771 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2772 elif(httpmethod
=="POST"):
2773 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2774 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
2776 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
2777 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
2778 except httpcore
.ConnectTimeout
:
2779 log
.info("Error With URL "+httpurl
);
2781 except httpcore
.ConnectError
:
2782 log
.info("Error With URL "+httpurl
);
2784 except socket
.timeout
:
2785 log
.info("Error With URL "+httpurl
);
2787 httpcodeout
= geturls_text
.status
;
2788 httpversionout
= "1.1";
2789 httpmethodout
= httpmethod
;
2790 httpurlout
= str(httpurl
);
2791 httpheaderout
= geturls_text
.headers
;
2792 httpheadersentout
= httpheaders
;
2793 if(isinstance(httpheaderout
, list)):
2794 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2795 if(isinstance(httpheadersentout
, list)):
2796 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2797 downloadsize
= int(httpheaderout
.get('Content-Length'));
2798 if(downloadsize
is not None):
2799 downloadsize
= int(downloadsize
);
2800 if downloadsize
is None: downloadsize
= 0;
2803 log
.info("Downloading URL "+httpurl
);
2804 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2805 tmpfilename
= f
.name
;
2807 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple())));
2808 except AttributeError:
2810 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
2811 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
2812 datasize
= len(databytes
);
2813 fulldatasize
= datasize
+ fulldatasize
;
2816 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2817 downloaddiff
= fulldatasize
- prevdownsize
;
2818 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2819 prevdownsize
= fulldatasize
;
2822 geturls_text
.close();
2823 exec_time_end
= time
.time();
2824 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2825 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2828 if(not havehttpcore
):
2829 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2830 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
2834 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2835 global geturls_download_sleep
;
2837 sleep
= geturls_download_sleep
;
2838 if(not outfile
=="-"):
2839 outpath
= outpath
.rstrip(os
.path
.sep
);
2840 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2841 if(not os
.path
.exists(outpath
)):
2842 os
.makedirs(outpath
);
2843 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2845 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2847 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2848 if(not pretmpfilename
):
2850 tmpfilename
= pretmpfilename
['Filename'];
2851 downloadsize
= os
.path
.getsize(tmpfilename
);
2853 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2854 exec_time_start
= time
.time();
2855 shutil
.move(tmpfilename
, filepath
);
2857 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple())));
2858 except AttributeError:
2860 exec_time_end
= time
.time();
2861 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2862 if(os
.path
.exists(tmpfilename
)):
2863 os
.remove(tmpfilename
);
2864 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2865 if(outfile
=="-" and sys
.version
[0]=="2"):
2866 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2867 if(not pretmpfilename
):
2869 tmpfilename
= pretmpfilename
['Filename'];
2870 downloadsize
= os
.path
.getsize(tmpfilename
);
2873 exec_time_start
= time
.time();
2874 with
open(tmpfilename
, 'rb') as ft
:
2877 databytes
= ft
.read(buffersize
[1]);
2878 if not databytes
: break;
2879 datasize
= len(databytes
);
2880 fulldatasize
= datasize
+ fulldatasize
;
2883 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2884 downloaddiff
= fulldatasize
- prevdownsize
;
2885 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2886 prevdownsize
= fulldatasize
;
2889 fdata
= f
.getvalue();
2892 os
.remove(tmpfilename
);
2893 exec_time_end
= time
.time();
2894 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2895 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2896 if(outfile
=="-" and sys
.version
[0]>="3"):
2897 pretmpfilename
= download_from_url_file_with_httpcore2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2898 tmpfilename
= pretmpfilename
['Filename'];
2899 downloadsize
= os
.path
.getsize(tmpfilename
);
2902 exec_time_start
= time
.time();
2903 with
open(tmpfilename
, 'rb') as ft
:
2906 databytes
= ft
.read(buffersize
[1]);
2907 if not databytes
: break;
2908 datasize
= len(databytes
);
2909 fulldatasize
= datasize
+ fulldatasize
;
2912 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2913 downloaddiff
= fulldatasize
- prevdownsize
;
2914 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2915 prevdownsize
= fulldatasize
;
2918 fdata
= f
.getvalue();
2921 os
.remove(tmpfilename
);
2922 exec_time_end
= time
.time();
2923 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2924 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2928 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2929 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
2933 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2934 global geturls_download_sleep
, havebrotli
;
2936 sleep
= geturls_download_sleep
;
2937 urlparts
= urlparse
.urlparse(httpurl
);
2938 if(isinstance(httpheaders
, list)):
2939 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2940 if(urlparts
.username
is not None or urlparts
.password
is not None):
2941 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2942 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2944 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
2945 if(postdata
is not None and not isinstance(postdata
, dict)):
2946 postdata
= urlencode(postdata
);
2948 if(httpmethod
=="GET"):
2949 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
2950 elif(httpmethod
=="POST"):
2951 geturls_text
= geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
2953 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
2954 except urllib3
.exceptions
.ConnectTimeoutError
:
2955 log
.info("Error With URL "+httpurl
);
2957 except urllib3
.exceptions
.ConnectError
:
2958 log
.info("Error With URL "+httpurl
);
2960 except urllib3
.exceptions
.MaxRetryError
:
2961 log
.info("Error With URL "+httpurl
);
2963 except socket
.timeout
:
2964 log
.info("Error With URL "+httpurl
);
2966 httpcodeout
= geturls_text
.status
;
2967 httpversionout
= "1.1";
2968 httpmethodout
= httpmethod
;
2969 httpurlout
= geturls_text
.geturl();
2970 httpheaderout
= geturls_text
.info();
2971 httpheadersentout
= httpheaders
;
2972 if(isinstance(httpheaderout
, list)):
2973 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
2974 if(isinstance(httpheadersentout
, list)):
2975 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
2976 log
.info("Downloading URL "+httpurl
);
2977 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
2978 if(sys
.version
[0]=="2"):
2979 strbuf
= StringIO(geturls_text
.read());
2980 if(sys
.version
[0]>="3"):
2981 strbuf
= BytesIO(geturls_text
.read());
2982 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
2983 returnval_content
= gzstrbuf
.read()[:];
2984 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
2985 returnval_content
= geturls_text
.read()[:];
2986 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
2987 returnval_content
= brotli
.decompress(returnval_content
);
2988 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
2989 geturls_text
.close();
2992 if(not haveurllib3
):
2993 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2994 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
2998 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2999 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
3000 exec_time_start
= time
.time();
3001 myhash
= hashlib
.new("sha1");
3002 if(sys
.version
[0]=="2"):
3003 myhash
.update(httpurl
);
3004 myhash
.update(str(buffersize
));
3005 myhash
.update(str(exec_time_start
));
3006 if(sys
.version
[0]>="3"):
3007 myhash
.update(httpurl
.encode('utf-8'));
3008 myhash
.update(str(buffersize
).encode('utf-8'));
3009 myhash
.update(str(exec_time_start
).encode('utf-8'));
3010 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3012 sleep
= geturls_download_sleep
;
3013 urlparts
= urlparse
.urlparse(httpurl
);
3014 if(isinstance(httpheaders
, list)):
3015 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3016 if(urlparts
.username
is not None or urlparts
.password
is not None):
3017 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3018 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3020 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
3021 if(postdata
is not None and not isinstance(postdata
, dict)):
3022 postdata
= urlencode(postdata
);
3024 if(httpmethod
=="GET"):
3025 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3026 elif(httpmethod
=="POST"):
3027 geturls_text
= geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
3029 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3030 except urllib3
.exceptions
.ConnectTimeoutError
:
3031 log
.info("Error With URL "+httpurl
);
3033 except urllib3
.exceptions
.ConnectError
:
3034 log
.info("Error With URL "+httpurl
);
3036 except urllib3
.exceptions
.MaxRetryError
:
3037 log
.info("Error With URL "+httpurl
);
3039 except socket
.timeout
:
3040 log
.info("Error With URL "+httpurl
);
3042 httpcodeout
= geturls_text
.status
;
3043 httpversionout
= "1.1";
3044 httpmethodout
= httpmethod
;
3045 httpurlout
= geturls_text
.geturl();
3046 httpheaderout
= geturls_text
.info();
3047 httpheadersentout
= httpheaders
;
3048 if(isinstance(httpheaderout
, list)):
3049 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3050 if(isinstance(httpheadersentout
, list)):
3051 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3052 downloadsize
= int(geturls_text
.headers
.get('Content-Length'));
3053 if(downloadsize
is not None):
3054 downloadsize
= int(downloadsize
);
3055 if downloadsize
is None: downloadsize
= 0;
3058 log
.info("Downloading URL "+httpurl
);
3059 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3060 tmpfilename
= f
.name
;
3062 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple())));
3063 except AttributeError:
3065 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
3067 databytes
= geturls_text
.read(buffersize
);
3068 if not databytes
: break;
3069 datasize
= len(databytes
);
3070 fulldatasize
= datasize
+ fulldatasize
;
3073 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3074 downloaddiff
= fulldatasize
- prevdownsize
;
3075 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3076 prevdownsize
= fulldatasize
;
3079 geturls_text
.close();
3080 exec_time_end
= time
.time();
3081 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3082 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3085 if(not haveurllib3
):
3086 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
3087 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
3091 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3092 global geturls_download_sleep
;
3094 sleep
= geturls_download_sleep
;
3095 if(not outfile
=="-"):
3096 outpath
= outpath
.rstrip(os
.path
.sep
);
3097 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3098 if(not os
.path
.exists(outpath
)):
3099 os
.makedirs(outpath
);
3100 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3102 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3104 pretmpfilename
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3105 if(not pretmpfilename
):
3107 tmpfilename
= pretmpfilename
['Filename'];
3108 downloadsize
= os
.path
.getsize(tmpfilename
);
3110 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3111 exec_time_start
= time
.time();
3112 shutil
.move(tmpfilename
, filepath
);
3114 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple())));
3115 except AttributeError:
3117 exec_time_end
= time
.time();
3118 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3119 if(os
.path
.exists(tmpfilename
)):
3120 os
.remove(tmpfilename
);
3121 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3122 if(outfile
=="-" and sys
.version
[0]=="2"):
3123 pretmpfilename
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3124 if(not pretmpfilename
):
3126 tmpfilename
= pretmpfilename
['Filename'];
3127 downloadsize
= os
.path
.getsize(tmpfilename
);
3130 exec_time_start
= time
.time();
3131 with
open(tmpfilename
, 'rb') as ft
:
3134 databytes
= ft
.read(buffersize
[1]);
3135 if not databytes
: break;
3136 datasize
= len(databytes
);
3137 fulldatasize
= datasize
+ fulldatasize
;
3140 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3141 downloaddiff
= fulldatasize
- prevdownsize
;
3142 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3143 prevdownsize
= fulldatasize
;
3146 fdata
= f
.getvalue();
3149 os
.remove(tmpfilename
);
3150 exec_time_end
= time
.time();
3151 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3152 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3153 if(outfile
=="-" and sys
.version
[0]>="3"):
3154 pretmpfilename
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3155 tmpfilename
= pretmpfilename
['Filename'];
3156 downloadsize
= os
.path
.getsize(tmpfilename
);
3159 exec_time_start
= time
.time();
3160 with
open(tmpfilename
, 'rb') as ft
:
3163 databytes
= ft
.read(buffersize
[1]);
3164 if not databytes
: break;
3165 datasize
= len(databytes
);
3166 fulldatasize
= datasize
+ fulldatasize
;
3169 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3170 downloaddiff
= fulldatasize
- prevdownsize
;
3171 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3172 prevdownsize
= fulldatasize
;
3175 fdata
= f
.getvalue();
3178 os
.remove(tmpfilename
);
3179 exec_time_end
= time
.time();
3180 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3181 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3184 if(not haveurllib3
):
3185 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3186 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
3190 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
3191 global geturls_download_sleep
, havebrotli
;
3193 sleep
= geturls_download_sleep
;
3194 urlparts
= urlparse
.urlparse(httpurl
);
3195 if(isinstance(httpheaders
, list)):
3196 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3197 if(urlparts
.username
is not None or urlparts
.password
is not None):
3198 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3199 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3201 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
3202 if(postdata
is not None and not isinstance(postdata
, dict)):
3203 postdata
= urlencode(postdata
);
3205 if(httpmethod
=="GET"):
3206 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3207 elif(httpmethod
=="POST"):
3208 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
3210 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3211 except urllib3
.exceptions
.ConnectTimeoutError
:
3212 log
.info("Error With URL "+httpurl
);
3214 except urllib3
.exceptions
.ConnectError
:
3215 log
.info("Error With URL "+httpurl
);
3217 except urllib3
.exceptions
.MaxRetryError
:
3218 log
.info("Error With URL "+httpurl
);
3220 except socket
.timeout
:
3221 log
.info("Error With URL "+httpurl
);
3223 httpcodeout
= geturls_text
.status
;
3224 httpversionout
= "1.1";
3225 httpmethodout
= httpmethod
;
3226 httpurlout
= geturls_text
.geturl();
3227 httpheaderout
= geturls_text
.info();
3228 httpheadersentout
= httpheaders
;
3229 if(isinstance(httpheaderout
, list)):
3230 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3231 if(isinstance(httpheadersentout
, list)):
3232 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3233 log
.info("Downloading URL "+httpurl
);
3234 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
3235 if(sys
.version
[0]=="2"):
3236 strbuf
= StringIO(geturls_text
.read());
3237 if(sys
.version
[0]>="3"):
3238 strbuf
= BytesIO(geturls_text
.read());
3239 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
3240 returnval_content
= gzstrbuf
.read()[:];
3241 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
3242 returnval_content
= geturls_text
.read()[:];
3243 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3244 returnval_content
= brotli
.decompress(returnval_content
);
3245 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
3246 geturls_text
.close();
3249 if(not haveurllib3
):
3250 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
3251 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
3255 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
3256 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
3257 exec_time_start
= time
.time();
3258 myhash
= hashlib
.new("sha1");
3259 if(sys
.version
[0]=="2"):
3260 myhash
.update(httpurl
);
3261 myhash
.update(str(buffersize
));
3262 myhash
.update(str(exec_time_start
));
3263 if(sys
.version
[0]>="3"):
3264 myhash
.update(httpurl
.encode('utf-8'));
3265 myhash
.update(str(buffersize
).encode('utf-8'));
3266 myhash
.update(str(exec_time_start
).encode('utf-8'));
3267 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3269 sleep
= geturls_download_sleep
;
3270 urlparts
= urlparse
.urlparse(httpurl
);
3271 if(isinstance(httpheaders
, list)):
3272 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3273 if(urlparts
.username
is not None or urlparts
.password
is not None):
3274 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3275 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3277 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
3278 if(postdata
is not None and not isinstance(postdata
, dict)):
3279 postdata
= urlencode(postdata
);
3281 if(httpmethod
=="GET"):
3282 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3283 elif(httpmethod
=="POST"):
3284 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
3286 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
3287 except urllib3
.exceptions
.ConnectTimeoutError
:
3288 log
.info("Error With URL "+httpurl
);
3290 except urllib3
.exceptions
.ConnectError
:
3291 log
.info("Error With URL "+httpurl
);
3293 except urllib3
.exceptions
.MaxRetryError
:
3294 log
.info("Error With URL "+httpurl
);
3296 except socket
.timeout
:
3297 log
.info("Error With URL "+httpurl
);
3299 httpcodeout
= geturls_text
.status
;
3300 httpversionout
= "1.1";
3301 httpmethodout
= httpmethod
;
3302 httpurlout
= geturls_text
.geturl();
3303 httpheaderout
= geturls_text
.info();
3304 httpheadersentout
= httpheaders
;
3305 if(isinstance(httpheaderout
, list)):
3306 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3307 if(isinstance(httpheadersentout
, list)):
3308 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3309 downloadsize
= int(geturls_text
.headers
.get('Content-Length'));
3310 if(downloadsize
is not None):
3311 downloadsize
= int(downloadsize
);
3312 if downloadsize
is None: downloadsize
= 0;
3315 log
.info("Downloading URL "+httpurl
);
3316 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3317 tmpfilename
= f
.name
;
3319 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple())));
3320 except AttributeError:
3322 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
3324 databytes
= geturls_text
.read(buffersize
);
3325 if not databytes
: break;
3326 datasize
= len(databytes
);
3327 fulldatasize
= datasize
+ fulldatasize
;
3330 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3331 downloaddiff
= fulldatasize
- prevdownsize
;
3332 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3333 prevdownsize
= fulldatasize
;
3336 geturls_text
.close();
3337 exec_time_end
= time
.time();
3338 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3339 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3342 if(not haveurllib3
):
3343 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
3344 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
3348 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3349 global geturls_download_sleep
;
3351 sleep
= geturls_download_sleep
;
3352 if(not outfile
=="-"):
3353 outpath
= outpath
.rstrip(os
.path
.sep
);
3354 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3355 if(not os
.path
.exists(outpath
)):
3356 os
.makedirs(outpath
);
3357 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3359 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3361 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3362 if(not pretmpfilename
):
3364 tmpfilename
= pretmpfilename
['Filename'];
3365 downloadsize
= os
.path
.getsize(tmpfilename
);
3367 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3368 exec_time_start
= time
.time();
3369 shutil
.move(tmpfilename
, filepath
);
3371 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple())));
3372 except AttributeError:
3374 exec_time_end
= time
.time();
3375 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3376 if(os
.path
.exists(tmpfilename
)):
3377 os
.remove(tmpfilename
);
3378 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3379 if(outfile
=="-" and sys
.version
[0]=="2"):
3380 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3381 if(not pretmpfilename
):
3383 tmpfilename
= pretmpfilename
['Filename'];
3384 downloadsize
= os
.path
.getsize(tmpfilename
);
3387 exec_time_start
= time
.time();
3388 with
open(tmpfilename
, 'rb') as ft
:
3391 databytes
= ft
.read(buffersize
[1]);
3392 if not databytes
: break;
3393 datasize
= len(databytes
);
3394 fulldatasize
= datasize
+ fulldatasize
;
3397 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3398 downloaddiff
= fulldatasize
- prevdownsize
;
3399 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3400 prevdownsize
= fulldatasize
;
3403 fdata
= f
.getvalue();
3406 os
.remove(tmpfilename
);
3407 exec_time_end
= time
.time();
3408 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3409 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3410 if(outfile
=="-" and sys
.version
[0]>="3"):
3411 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3412 tmpfilename
= pretmpfilename
['Filename'];
3413 downloadsize
= os
.path
.getsize(tmpfilename
);
3416 exec_time_start
= time
.time();
3417 with
open(tmpfilename
, 'rb') as ft
:
3420 databytes
= ft
.read(buffersize
[1]);
3421 if not databytes
: break;
3422 datasize
= len(databytes
);
3423 fulldatasize
= datasize
+ fulldatasize
;
3426 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3427 downloaddiff
= fulldatasize
- prevdownsize
;
3428 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3429 prevdownsize
= fulldatasize
;
3432 fdata
= f
.getvalue();
3435 os
.remove(tmpfilename
);
3436 exec_time_end
= time
.time();
3437 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3438 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3441 if(not haveurllib3
):
3442 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3443 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
3447 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
3448 global geturls_download_sleep
, havebrotli
;
3450 sleep
= geturls_download_sleep
;
3451 urlparts
= urlparse
.urlparse(httpurl
);
3452 if(isinstance(httpheaders
, list)):
3453 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3454 if(urlparts
.username
is not None or urlparts
.password
is not None):
3455 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3456 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3457 geturls_opener
= mechanize
.Browser();
3458 if(isinstance(httpheaders
, dict)):
3459 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
3461 geturls_opener
.addheaders
= httpheaders
;
3462 geturls_opener
.set_cookiejar(httpcookie
);
3463 geturls_opener
.set_handle_robots(False);
3464 if(postdata
is not None and not isinstance(postdata
, dict)):
3465 postdata
= urlencode(postdata
);
3467 if(httpmethod
=="GET"):
3468 geturls_text
= geturls_opener
.open(httpurl
);
3469 elif(httpmethod
=="POST"):
3470 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
3472 geturls_text
= geturls_opener
.open(httpurl
);
3473 except mechanize
.HTTPError
as geturls_text_error
:
3474 geturls_text
= geturls_text_error
;
3475 log
.info("Error With URL "+httpurl
);
3477 log
.info("Error With URL "+httpurl
);
3479 except socket
.timeout
:
3480 log
.info("Error With URL "+httpurl
);
3482 httpcodeout
= geturls_text
.code
;
3483 httpversionout
= "1.1";
3484 httpmethodout
= httpmethod
;
3485 httpurlout
= geturls_text
.geturl();
3486 httpheaderout
= geturls_text
.info();
3487 httpheadersentout
= httpheaders
;
3488 if(isinstance(httpheaderout
, list)):
3489 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3490 if(isinstance(httpheadersentout
, list)):
3491 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3492 log
.info("Downloading URL "+httpurl
);
3493 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
3494 if(sys
.version
[0]=="2"):
3495 strbuf
= StringIO(geturls_text
.read());
3496 if(sys
.version
[0]>="3"):
3497 strbuf
= BytesIO(geturls_text
.read());
3498 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
3499 returnval_content
= gzstrbuf
.read()[:];
3500 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
3501 returnval_content
= geturls_text
.read()[:];
3502 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
3503 returnval_content
= brotli
.decompress(returnval_content
);
3504 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
3505 geturls_text
.close();
3508 if(not havemechanize
):
3509 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
3510 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
3514 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
3515 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
3516 exec_time_start
= time
.time();
3517 myhash
= hashlib
.new("sha1");
3518 if(sys
.version
[0]=="2"):
3519 myhash
.update(httpurl
);
3520 myhash
.update(str(buffersize
));
3521 myhash
.update(str(exec_time_start
));
3522 if(sys
.version
[0]>="3"):
3523 myhash
.update(httpurl
.encode('utf-8'));
3524 myhash
.update(str(buffersize
).encode('utf-8'));
3525 myhash
.update(str(exec_time_start
).encode('utf-8'));
3526 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3528 sleep
= geturls_download_sleep
;
3529 urlparts
= urlparse
.urlparse(httpurl
);
3530 if(isinstance(httpheaders
, list)):
3531 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3532 if(urlparts
.username
is not None or urlparts
.password
is not None):
3533 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
3534 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
3535 geturls_opener
= mechanize
.Browser();
3536 if(isinstance(httpheaders
, dict)):
3537 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
3539 geturls_opener
.addheaders
= httpheaders
;
3540 geturls_opener
.set_cookiejar(httpcookie
);
3541 geturls_opener
.set_handle_robots(False);
3542 if(postdata
is not None and not isinstance(postdata
, dict)):
3543 postdata
= urlencode(postdata
);
3545 if(httpmethod
=="GET"):
3546 geturls_text
= geturls_opener
.open(httpurl
);
3547 elif(httpmethod
=="POST"):
3548 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
3550 geturls_text
= geturls_opener
.open(httpurl
);
3551 except mechanize
.HTTPError
as geturls_text_error
:
3552 geturls_text
= geturls_text_error
;
3553 log
.info("Error With URL "+httpurl
);
3555 log
.info("Error With URL "+httpurl
);
3557 except socket
.timeout
:
3558 log
.info("Error With URL "+httpurl
);
3560 httpcodeout
= geturls_text
.code
;
3561 httpversionout
= "1.1";
3562 httpmethodout
= httpmethod
;
3563 httpurlout
= geturls_text
.geturl();
3564 httpheaderout
= geturls_text
.info();
3565 httpheadersentout
= httpheaders
;
3566 if(isinstance(httpheaderout
, list)):
3567 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
3568 if(isinstance(httpheadersentout
, list)):
3569 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
3570 downloadsize
= int(httpheaderout
.get('Content-Length'));
3571 if(downloadsize
is not None):
3572 downloadsize
= int(downloadsize
);
3573 if downloadsize
is None: downloadsize
= 0;
3576 log
.info("Downloading URL "+httpurl
);
3577 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3578 tmpfilename
= f
.name
;
3580 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
['Last-Modified']).timetuple())));
3581 except AttributeError:
3583 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
3585 databytes
= geturls_text
.read(buffersize
);
3586 if not databytes
: break;
3587 datasize
= len(databytes
);
3588 fulldatasize
= datasize
+ fulldatasize
;
3591 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3592 downloaddiff
= fulldatasize
- prevdownsize
;
3593 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3594 prevdownsize
= fulldatasize
;
3597 geturls_text
.close();
3598 exec_time_end
= time
.time();
3599 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3600 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3603 if(not havemechanize
):
3604 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
3605 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
3609 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3610 global geturls_download_sleep
;
3612 sleep
= geturls_download_sleep
;
3613 if(not outfile
=="-"):
3614 outpath
= outpath
.rstrip(os
.path
.sep
);
3615 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3616 if(not os
.path
.exists(outpath
)):
3617 os
.makedirs(outpath
);
3618 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3620 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3622 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3623 if(not pretmpfilename
):
3625 tmpfilename
= pretmpfilename
['Filename'];
3626 downloadsize
= os
.path
.getsize(tmpfilename
);
3628 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3629 exec_time_start
= time
.time();
3630 shutil
.move(tmpfilename
, filepath
);
3632 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
['Headers']['Last-Modified']).timetuple())));
3633 except AttributeError:
3635 exec_time_end
= time
.time();
3636 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3637 if(os
.path
.exists(tmpfilename
)):
3638 os
.remove(tmpfilename
);
3639 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3640 if(outfile
=="-" and sys
.version
[0]=="2"):
3641 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3642 if(not pretmpfilename
):
3644 tmpfilename
= pretmpfilename
['Filename'];
3645 downloadsize
= os
.path
.getsize(tmpfilename
);
3648 exec_time_start
= time
.time();
3649 with
open(tmpfilename
, 'rb') as ft
:
3652 databytes
= ft
.read(buffersize
[1]);
3653 if not databytes
: break;
3654 datasize
= len(databytes
);
3655 fulldatasize
= datasize
+ fulldatasize
;
3658 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3659 downloaddiff
= fulldatasize
- prevdownsize
;
3660 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3661 prevdownsize
= fulldatasize
;
3664 fdata
= f
.getvalue();
3667 os
.remove(tmpfilename
);
3668 exec_time_end
= time
.time();
3669 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3670 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3671 if(outfile
=="-" and sys
.version
[0]>="3"):
3672 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3673 tmpfilename
= pretmpfilename
['Filename'];
3674 downloadsize
= os
.path
.getsize(tmpfilename
);
3677 exec_time_start
= time
.time();
3678 with
open(tmpfilename
, 'rb') as ft
:
3681 databytes
= ft
.read(buffersize
[1]);
3682 if not databytes
: break;
3683 datasize
= len(databytes
);
3684 fulldatasize
= datasize
+ fulldatasize
;
3687 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3688 downloaddiff
= fulldatasize
- prevdownsize
;
3689 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3690 prevdownsize
= fulldatasize
;
3693 fdata
= f
.getvalue();
3696 os
.remove(tmpfilename
);
3697 exec_time_end
= time
.time();
3698 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3699 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3702 if(not havemechanize
):
3703 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3704 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
3707 def download_file_from_ftp_file(url
):
3708 urlparts
= urlparse
.urlparse(url
);
3709 file_name
= os
.path
.basename(urlparts
.path
);
3710 file_dir
= os
.path
.dirname(urlparts
.path
);
3711 if(urlparts
.username
is not None):
3712 ftp_username
= urlparts
.username
;
3714 ftp_username
= "anonymous";
3715 if(urlparts
.password
is not None):
3716 ftp_password
= urlparts
.password
;
3717 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
3718 ftp_password
= "anonymous";
3721 if(urlparts
.scheme
=="ftp"):
3723 elif(urlparts
.scheme
=="ftps"):
3727 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
3729 ftp_port
= urlparts
.port
;
3730 if(urlparts
.port
is None):
3733 ftp
.connect(urlparts
.hostname
, ftp_port
);
3734 except socket
.gaierror
:
3735 log
.info("Error With URL "+httpurl
);
3737 except socket
.timeout
:
3738 log
.info("Error With URL "+httpurl
);
3740 ftp
.login(urlparts
.username
, urlparts
.password
);
3741 if(urlparts
.scheme
=="ftps"):
3743 ftpfile
= BytesIO();
3744 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
3745 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
3750 def download_file_from_ftp_string(url
):
3751 ftpfile
= download_file_from_ftp_file(url
);
3752 return ftpfile
.read();
3754 def download_from_url_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
3755 global geturls_download_sleep
, havebrotli
;
3757 sleep
= geturls_download_sleep
;
3758 urlparts
= urlparse
.urlparse(httpurl
);
3759 if(isinstance(httpheaders
, list)):
3760 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3761 if(isinstance(httpheaders
, dict)):
3762 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
3764 geturls_text
= download_file_from_ftp_file(httpurl
);
3765 if(not geturls_text
):
3767 log
.info("Downloading URL "+httpurl
);
3768 returnval_content
= geturls_text
.read()[:];
3769 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
3770 geturls_text
.close();
3773 def download_from_url_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
3774 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
3775 exec_time_start
= time
.time();
3776 myhash
= hashlib
.new("sha1");
3777 if(sys
.version
[0]=="2"):
3778 myhash
.update(httpurl
);
3779 myhash
.update(str(buffersize
));
3780 myhash
.update(str(exec_time_start
));
3781 if(sys
.version
[0]>="3"):
3782 myhash
.update(httpurl
.encode('utf-8'));
3783 myhash
.update(str(buffersize
).encode('utf-8'));
3784 myhash
.update(str(exec_time_start
).encode('utf-8'));
3785 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
3787 sleep
= geturls_download_sleep
;
3788 urlparts
= urlparse
.urlparse(httpurl
);
3789 if(isinstance(httpheaders
, list)):
3790 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
3791 if(isinstance(httpheaders
, dict)):
3792 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
3794 geturls_text
= download_file_from_ftp_file(httpurl
);
3795 if(not geturls_text
):
3797 geturls_text
.seek(0, 2);
3798 downloadsize
= geturls_text
.tell();
3799 geturls_text
.seek(0, 0);
3800 if(downloadsize
is not None):
3801 downloadsize
= int(downloadsize
);
3802 if downloadsize
is None: downloadsize
= 0;
3805 log
.info("Downloading URL "+httpurl
);
3806 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
3807 tmpfilename
= f
.name
;
3808 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
3810 databytes
= geturls_text
.read(buffersize
);
3811 if not databytes
: break;
3812 datasize
= len(databytes
);
3813 fulldatasize
= datasize
+ fulldatasize
;
3816 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3817 downloaddiff
= fulldatasize
- prevdownsize
;
3818 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3819 prevdownsize
= fulldatasize
;
3822 geturls_text
.close();
3823 exec_time_end
= time
.time();
3824 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
3825 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
3828 def download_from_url_to_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
3829 global geturls_download_sleep
;
3831 sleep
= geturls_download_sleep
;
3832 if(not outfile
=="-"):
3833 outpath
= outpath
.rstrip(os
.path
.sep
);
3834 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
3835 if(not os
.path
.exists(outpath
)):
3836 os
.makedirs(outpath
);
3837 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
3839 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
3841 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3842 if(not pretmpfilename
):
3844 tmpfilename
= pretmpfilename
['Filename'];
3845 downloadsize
= os
.path
.getsize(tmpfilename
);
3847 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
3848 exec_time_start
= time
.time();
3849 shutil
.move(tmpfilename
, filepath
);
3850 exec_time_end
= time
.time();
3851 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
3852 if(os
.path
.exists(tmpfilename
)):
3853 os
.remove(tmpfilename
);
3854 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3855 if(outfile
=="-" and sys
.version
[0]=="2"):
3856 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3857 if(not pretmpfilename
):
3859 tmpfilename
= pretmpfilename
['Filename'];
3860 downloadsize
= os
.path
.getsize(tmpfilename
);
3863 exec_time_start
= time
.time();
3864 with
open(tmpfilename
, 'rb') as ft
:
3867 databytes
= ft
.read(buffersize
[1]);
3868 if not databytes
: break;
3869 datasize
= len(databytes
);
3870 fulldatasize
= datasize
+ fulldatasize
;
3873 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3874 downloaddiff
= fulldatasize
- prevdownsize
;
3875 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3876 prevdownsize
= fulldatasize
;
3879 fdata
= f
.getvalue();
3882 os
.remove(tmpfilename
);
3883 exec_time_end
= time
.time();
3884 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3885 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3886 if(outfile
=="-" and sys
.version
[0]>="3"):
3887 pretmpfilename
= download_from_url_file_with_ftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
3888 tmpfilename
= pretmpfilename
['Filename'];
3889 downloadsize
= os
.path
.getsize(tmpfilename
);
3892 exec_time_start
= time
.time();
3893 with
open(tmpfilename
, 'rb') as ft
:
3896 databytes
= ft
.read(buffersize
[1]);
3897 if not databytes
: break;
3898 datasize
= len(databytes
);
3899 fulldatasize
= datasize
+ fulldatasize
;
3902 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
3903 downloaddiff
= fulldatasize
- prevdownsize
;
3904 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
3905 prevdownsize
= fulldatasize
;
3908 fdata
= f
.getvalue();
3911 os
.remove(tmpfilename
);
3912 exec_time_end
= time
.time();
3913 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
3914 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
3917 def upload_file_to_ftp_file(ftpfile
, url
):
3918 urlparts
= urlparse
.urlparse(url
);
3919 file_name
= os
.path
.basename(urlparts
.path
);
3920 file_dir
= os
.path
.dirname(urlparts
.path
);
3921 if(urlparts
.username
is not None):
3922 ftp_username
= urlparts
.username
;
3924 ftp_username
= "anonymous";
3925 if(urlparts
.password
is not None):
3926 ftp_password
= urlparts
.password
;
3927 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
3928 ftp_password
= "anonymous";
3931 if(urlparts
.scheme
=="ftp"):
3933 elif(urlparts
.scheme
=="ftps"):
3937 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
3939 ftp_port
= urlparts
.port
;
3940 if(urlparts
.port
is None):
3943 ftp
.connect(urlparts
.hostname
, ftp_port
);
3944 except socket
.gaierror
:
3945 log
.info("Error With URL "+httpurl
);
3947 except socket
.timeout
:
3948 log
.info("Error With URL "+httpurl
);
3950 ftp
.login(urlparts
.username
, urlparts
.password
);
3951 if(urlparts
.scheme
=="ftps"):
3953 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
3958 def upload_file_to_ftp_string(ftpstring
, url
):
3959 ftpfileo
= BytesIO(ftpstring
);
3960 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
3965 def download_file_from_sftp_file(url
):
3966 urlparts
= urlparse
.urlparse(url
);
3967 file_name
= os
.path
.basename(urlparts
.path
);
3968 file_dir
= os
.path
.dirname(urlparts
.path
);
3969 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
3971 sftp_port
= urlparts
.port
;
3972 if(urlparts
.port
is None):
3975 sftp_port
= urlparts
.port
;
3976 if(urlparts
.username
is not None):
3977 sftp_username
= urlparts
.username
;
3979 sftp_username
= "anonymous";
3980 if(urlparts
.password
is not None):
3981 sftp_password
= urlparts
.password
;
3982 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
3983 sftp_password
= "anonymous";
3986 if(urlparts
.scheme
!="sftp"):
3988 ssh
= paramiko
.SSHClient();
3989 ssh
.load_system_host_keys();
3990 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
3992 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
3993 except paramiko
.ssh_exception
.SSHException
:
3995 except socket
.gaierror
:
3996 log
.info("Error With URL "+httpurl
);
3998 except socket
.timeout
:
3999 log
.info("Error With URL "+httpurl
);
4001 sftp
= ssh
.open_sftp();
4002 sftpfile
= BytesIO();
4003 sftp
.getfo(urlparts
.path
, sftpfile
);
4006 sftpfile
.seek(0, 0);
4009 def download_file_from_sftp_file(url
):
4013 def download_file_from_sftp_string(url
):
4014 sftpfile
= download_file_from_sftp_file(url
);
4015 return sftpfile
.read();
4017 def download_file_from_ftp_string(url
):
4021 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
4022 global geturls_download_sleep
, havebrotli
;
4024 sleep
= geturls_download_sleep
;
4025 urlparts
= urlparse
.urlparse(httpurl
);
4026 if(isinstance(httpheaders
, list)):
4027 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4028 if(isinstance(httpheaders
, dict)):
4029 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4031 geturls_text
= download_file_from_sftp_file(httpurl
);
4032 if(not geturls_text
):
4034 log
.info("Downloading URL "+httpurl
);
4035 returnval_content
= geturls_text
.read()[:];
4036 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4037 geturls_text
.close();
4040 if(not haveparamiko
):
4041 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
4045 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
4046 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
4047 exec_time_start
= time
.time();
4048 myhash
= hashlib
.new("sha1");
4049 if(sys
.version
[0]=="2"):
4050 myhash
.update(httpurl
);
4051 myhash
.update(str(buffersize
));
4052 myhash
.update(str(exec_time_start
));
4053 if(sys
.version
[0]>="3"):
4054 myhash
.update(httpurl
.encode('utf-8'));
4055 myhash
.update(str(buffersize
).encode('utf-8'));
4056 myhash
.update(str(exec_time_start
).encode('utf-8'));
4057 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
4059 sleep
= geturls_download_sleep
;
4060 urlparts
= urlparse
.urlparse(httpurl
);
4061 if(isinstance(httpheaders
, list)):
4062 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
4063 if(isinstance(httpheaders
, dict)):
4064 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
4066 geturls_text
= download_file_from_sftp_file(httpurl
);
4067 if(not geturls_text
):
4069 geturls_text
.seek(0, 2);
4070 downloadsize
= geturls_text
.tell();
4071 geturls_text
.seek(0, 0);
4072 if(downloadsize
is not None):
4073 downloadsize
= int(downloadsize
);
4074 if downloadsize
is None: downloadsize
= 0;
4077 log
.info("Downloading URL "+httpurl
);
4078 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
4079 tmpfilename
= f
.name
;
4080 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
4082 databytes
= geturls_text
.read(buffersize
);
4083 if not databytes
: break;
4084 datasize
= len(databytes
);
4085 fulldatasize
= datasize
+ fulldatasize
;
4088 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4089 downloaddiff
= fulldatasize
- prevdownsize
;
4090 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4091 prevdownsize
= fulldatasize
;
4094 geturls_text
.close();
4095 exec_time_end
= time
.time();
4096 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
4097 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
4100 if(not haveparamiko
):
4101 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
4105 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
4106 global geturls_download_sleep
;
4108 sleep
= geturls_download_sleep
;
4109 if(not outfile
=="-"):
4110 outpath
= outpath
.rstrip(os
.path
.sep
);
4111 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
4112 if(not os
.path
.exists(outpath
)):
4113 os
.makedirs(outpath
);
4114 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
4116 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
4118 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4119 if(not pretmpfilename
):
4121 tmpfilename
= pretmpfilename
['Filename'];
4122 downloadsize
= os
.path
.getsize(tmpfilename
);
4124 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
4125 exec_time_start
= time
.time();
4126 shutil
.move(tmpfilename
, filepath
);
4127 exec_time_end
= time
.time();
4128 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
4129 if(os
.path
.exists(tmpfilename
)):
4130 os
.remove(tmpfilename
);
4131 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
4132 if(outfile
=="-" and sys
.version
[0]=="2"):
4133 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4134 if(not pretmpfilename
):
4136 tmpfilename
= pretmpfilename
['Filename'];
4137 downloadsize
= os
.path
.getsize(tmpfilename
);
4140 exec_time_start
= time
.time();
4141 with
open(tmpfilename
, 'rb') as ft
:
4144 databytes
= ft
.read(buffersize
[1]);
4145 if not databytes
: break;
4146 datasize
= len(databytes
);
4147 fulldatasize
= datasize
+ fulldatasize
;
4150 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4151 downloaddiff
= fulldatasize
- prevdownsize
;
4152 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4153 prevdownsize
= fulldatasize
;
4156 fdata
= f
.getvalue();
4159 os
.remove(tmpfilename
);
4160 exec_time_end
= time
.time();
4161 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4162 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
4163 if(outfile
=="-" and sys
.version
[0]>="3"):
4164 pretmpfilename
= download_from_url_file_with_sftp(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
4165 tmpfilename
= pretmpfilename
['Filename'];
4166 downloadsize
= os
.path
.getsize(tmpfilename
);
4169 exec_time_start
= time
.time();
4170 with
open(tmpfilename
, 'rb') as ft
:
4173 databytes
= ft
.read(buffersize
[1]);
4174 if not databytes
: break;
4175 datasize
= len(databytes
);
4176 fulldatasize
= datasize
+ fulldatasize
;
4179 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
4180 downloaddiff
= fulldatasize
- prevdownsize
;
4181 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
4182 prevdownsize
= fulldatasize
;
4185 fdata
= f
.getvalue();
4188 os
.remove(tmpfilename
);
4189 exec_time_end
= time
.time();
4190 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
4191 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': None, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
4194 if(not haveparamiko
):
4195 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
4199 def upload_file_to_sftp_file(sftpfile
, url
):
4200 urlparts
= urlparse
.urlparse(url
);
4201 file_name
= os
.path
.basename(urlparts
.path
);
4202 file_dir
= os
.path
.dirname(urlparts
.path
);
4203 sftp_port
= urlparts
.port
;
4204 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
4206 if(urlparts
.port
is None):
4209 sftp_port
= urlparts
.port
;
4210 if(urlparts
.username
is not None):
4211 sftp_username
= urlparts
.username
;
4213 sftp_username
= "anonymous";
4214 if(urlparts
.password
is not None):
4215 sftp_password
= urlparts
.password
;
4216 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
4217 sftp_password
= "anonymous";
4220 if(urlparts
.scheme
!="sftp"):
4222 ssh
= paramiko
.SSHClient();
4223 ssh
.load_system_host_keys();
4224 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
4226 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
4227 except paramiko
.ssh_exception
.SSHException
:
4229 except socket
.gaierror
:
4230 log
.info("Error With URL "+httpurl
);
4232 except socket
.timeout
:
4233 log
.info("Error With URL "+httpurl
);
4235 sftp
= ssh
.open_sftp();
4236 sftp
.putfo(sftpfile
, urlparts
.path
);
4239 sftpfile
.seek(0, 0);
4242 def upload_file_to_sftp_file(sftpfile
, url
):
4246 def upload_file_to_sftp_string(sftpstring
, url
):
4247 sftpfileo
= BytesIO(sftpstring
);
4248 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
4252 def upload_file_to_sftp_string(url
):