4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwget.py - Last Update: 9/24/2023 Ver. 1.5.0 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, gzip
, time
, argparse
, cgi
, subprocess
, socket
, email
.utils
, datetime
, time
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
29 havemechanize
= False;
34 havemechanize
= False;
55 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
83 if(sys
.version
[0]=="2"):
85 from cStringIO
import StringIO
;
87 from StringIO
import StringIO
;
88 # From http://python-future.org/compatible_idioms.html
89 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
90 from urllib
import urlencode
;
91 from urllib
import urlopen
as urlopenalt
;
92 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
, URLError
, build_opener
, HTTPCookieProcessor
;
93 import urlparse
, cookielib
;
94 from httplib
import HTTPConnection
, HTTPSConnection
;
95 if(sys
.version
[0]>="3"):
96 from io
import StringIO
, BytesIO
;
97 # From http://python-future.org/compatible_idioms.html
98 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
99 from urllib
.request
import urlopen
, Request
, install_opener
, build_opener
, HTTPCookieProcessor
;
100 from urllib
.error
import HTTPError
, URLError
;
101 import urllib
.parse
as urlparse
;
102 import http
.cookiejar
as cookielib
;
103 from http
.client
import HTTPConnection
, HTTPSConnection
;
105 __program_name__
= "PyWWW-Get";
106 __program_alt_name__
= "PyWWWGet";
107 __program_small_name__
= "wwwget";
108 __project__
= __program_name__
;
109 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
110 __version_info__
= (1, 5, 0, "RC 1", 1);
111 __version_date_info__
= (2023, 9, 24, "RC 1", 1);
112 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
113 __revision__
= __version_info__
[3];
114 __revision_id__
= "$Id$";
115 if(__version_info__
[4] is not None):
116 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
117 if(__version_info__
[4] is None):
118 __version_date_plusrc__
= __version_date__
;
119 if(__version_info__
[3] is not None):
120 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
121 if(__version_info__
[3] is None):
122 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
124 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
126 pytempdir
= tempfile
.gettempdir();
128 PyBitness
= platform
.architecture();
129 if(PyBitness
=="32bit" or PyBitness
=="32"):
131 elif(PyBitness
=="64bit" or PyBitness
=="64"):
136 compression_supported
= "gzip, deflate";
138 compression_supported
= "gzip, deflate, br";
140 compression_supported
= "gzip, deflate";
142 geturls_cj
= cookielib
.CookieJar();
143 windowsNT4_ua_string
= "Windows NT 4.0";
144 windowsNT4_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
145 windows2k_ua_string
= "Windows NT 5.0";
146 windows2k_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
147 windowsXP_ua_string
= "Windows NT 5.1";
148 windowsXP_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
149 windowsXP64_ua_string
= "Windows NT 5.2; Win64; x64";
150 windowsXP64_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
151 windows7_ua_string
= "Windows NT 6.1; Win64; x64";
152 windows7_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
153 windows8_ua_string
= "Windows NT 6.2; Win64; x64";
154 windows8_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
155 windows81_ua_string
= "Windows NT 6.3; Win64; x64";
156 windows81_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
157 windows10_ua_string
= "Windows NT 10.0; Win64; x64";
158 windows10_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
159 windows11_ua_string
= "Windows NT 11.0; Win64; x64";
160 windows11_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
161 geturls_ua_firefox_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:109.0) Gecko/20100101 Firefox/117.0";
162 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
163 geturls_ua_chrome_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
164 geturls_ua_chromium_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
165 geturls_ua_palemoon_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
166 geturls_ua_opera_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
167 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
168 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; Trident/7.0; rv:11.0) like Gecko";
169 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
170 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
171 if(platform
.python_implementation()!=""):
172 py_implementation
= platform
.python_implementation();
173 if(platform
.python_implementation()==""):
174 py_implementation
= "Python";
175 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=py_implementation
, pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
176 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
177 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
178 geturls_ua
= geturls_ua_firefox_windows7
;
179 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
180 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
181 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
182 geturls_headers_chrome_windows7
.update(windows7_ua_addon
);
183 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
184 geturls_headers_chromium_windows7
.update(windows7_ua_addon
);
185 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
186 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
187 geturls_headers_opera_windows7
.update(windows7_ua_addon
);
188 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
189 geturls_headers_vivaldi_windows7
.update(windows7_ua_addon
);
190 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
191 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
192 geturls_headers_microsoft_edge_windows7
.update(windows7_ua_addon
);
193 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
194 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
195 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
196 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
197 geturls_headers
= geturls_headers_firefox_windows7
;
198 geturls_download_sleep
= 0;
200 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
201 if(outtype
=="print" and dbgenable
):
204 elif(outtype
=="log" and dbgenable
):
205 logging
.info(dbgtxt
);
207 elif(outtype
=="warning" and dbgenable
):
208 logging
.warning(dbgtxt
);
210 elif(outtype
=="error" and dbgenable
):
211 logging
.error(dbgtxt
);
213 elif(outtype
=="critical" and dbgenable
):
214 logging
.critical(dbgtxt
);
216 elif(outtype
=="exception" and dbgenable
):
217 logging
.exception(dbgtxt
);
219 elif(outtype
=="logalt" and dbgenable
):
220 logging
.log(dgblevel
, dbgtxt
);
222 elif(outtype
=="debug" and dbgenable
):
223 logging
.debug(dbgtxt
);
231 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
232 dbgout
= verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
237 def add_url_param(url
, **params
):
239 parts
= list(urlparse
.urlsplit(url
));
240 d
= dict(cgi
.parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
242 parts
[n
]=urlencode(d
);
243 return urlparse
.urlunsplit(parts
);
245 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
246 def which_exec(execfile):
247 for path
in os
.environ
["PATH"].split(":"):
248 if os
.path
.exists(path
+ "/" + execfile):
249 return path
+ "/" + execfile;
251 def listize(varlist
):
259 newlistreg
.update({ilx
: varlist
[il
]});
260 newlistrev
.update({varlist
[il
]: ilx
});
263 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
266 def twolistize(varlist
):
276 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
277 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
278 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
279 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
282 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
283 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
284 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
287 def arglistize(proexec
, *varlist
):
291 newarglist
= [proexec
];
293 if varlist
[il
][0] is not None:
294 newarglist
.append(varlist
[il
][0]);
295 if varlist
[il
][1] is not None:
296 newarglist
.append(varlist
[il
][1]);
300 def fix_header_names(header_dict
):
301 header_dict
= {k
.title(): v
for k
, v
in header_dict
.items()};
304 # hms_string by ArcGIS Python Recipes
305 # https://arcpy.wordpress.com/2012/04/20/146/
306 def hms_string(sec_elapsed
):
307 h
= int(sec_elapsed
/ (60 * 60));
308 m
= int((sec_elapsed
% (60 * 60)) / 60);
309 s
= sec_elapsed
% 60.0;
310 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
312 # get_readable_size by Lipis
313 # http://stackoverflow.com/posts/14998888/revisions
314 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
316 if(unit
!="IEC" and unit
!="SI"):
319 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
320 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
323 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
324 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
329 if abs(bytes
) < unitsize
:
330 strformat
= "%3."+str(precision
)+"f%s";
331 pre_return_val
= (strformat
% (bytes
, unit
));
332 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
333 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
334 alt_return_val
= pre_return_val
.split();
335 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
338 strformat
= "%."+str(precision
)+"f%s";
339 pre_return_val
= (strformat
% (bytes
, "YiB"));
340 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
341 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
342 alt_return_val
= pre_return_val
.split();
343 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
346 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
348 usehashtypes
= usehashtypes
.lower();
349 getfilesize
= os
.path
.getsize(infile
);
350 return_val
= get_readable_size(getfilesize
, precision
, unit
);
352 hashtypelist
= usehashtypes
.split(",");
353 openfile
= open(infile
, "rb");
354 filecontents
= openfile
.read();
357 listnumend
= len(hashtypelist
);
358 while(listnumcount
< listnumend
):
359 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
360 hashtypelistup
= hashtypelistlow
.upper();
361 filehash
= hashlib
.new(hashtypelistup
);
362 filehash
.update(filecontents
);
363 filegethash
= filehash
.hexdigest();
364 return_val
.update({hashtypelistup
: filegethash
});
368 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
370 usehashtypes
= usehashtypes
.lower();
371 getfilesize
= len(instring
);
372 return_val
= get_readable_size(getfilesize
, precision
, unit
);
374 hashtypelist
= usehashtypes
.split(",");
376 listnumend
= len(hashtypelist
);
377 while(listnumcount
< listnumend
):
378 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
379 hashtypelistup
= hashtypelistlow
.upper();
380 filehash
= hashlib
.new(hashtypelistup
);
381 if(sys
.version
[0]=="2"):
382 filehash
.update(instring
);
383 if(sys
.version
[0]>="3"):
384 filehash
.update(instring
.encode('utf-8'));
385 filegethash
= filehash
.hexdigest();
386 return_val
.update({hashtypelistup
: filegethash
});
390 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
391 if isinstance(headers
, dict):
393 if(sys
.version
[0]=="2"):
394 for headkey
, headvalue
in headers
.iteritems():
395 returnval
.append((headkey
, headvalue
));
396 if(sys
.version
[0]>="3"):
397 for headkey
, headvalue
in headers
.items():
398 returnval
.append((headkey
, headvalue
));
399 elif isinstance(headers
, list):
405 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
406 if isinstance(headers
, dict):
408 if(sys
.version
[0]=="2"):
409 for headkey
, headvalue
in headers
.iteritems():
410 returnval
.append(headkey
+": "+headvalue
);
411 if(sys
.version
[0]>="3"):
412 for headkey
, headvalue
in headers
.items():
413 returnval
.append(headkey
+": "+headvalue
);
414 elif isinstance(headers
, list):
420 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", compression_supported
), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
421 if isinstance(headers
, list):
426 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
428 elif isinstance(headers
, dict):
434 def get_httplib_support(checkvalue
=None):
435 global haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
437 returnval
.append("ftp");
438 returnval
.append("httplib");
440 returnval
.append("httplib2");
441 returnval
.append("urllib");
443 returnval
.append("urllib3");
444 returnval
.append("request3");
445 returnval
.append("request");
447 returnval
.append("requests");
449 returnval
.append("httpx");
450 returnval
.append("httpx2");
452 returnval
.append("mechanize");
454 returnval
.append("sftp");
456 returnval
.append("pysftp");
457 if(not checkvalue
is None):
458 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
459 checkvalue
= "urllib";
460 if(checkvalue
=="httplib1"):
461 checkvalue
= "httplib";
462 if(checkvalue
in returnval
):
468 def check_httplib_support(checkvalue
="urllib"):
469 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
470 checkvalue
= "urllib";
471 if(checkvalue
=="httplib1"):
472 checkvalue
= "httplib";
473 returnval
= get_httplib_support(checkvalue
);
476 def get_httplib_support_list():
477 returnval
= get_httplib_support(None);
480 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", sleep
=-1):
481 global geturls_download_sleep
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
483 sleep
= geturls_download_sleep
;
484 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
485 httplibuse
= "urllib";
486 if(httplibuse
=="httplib1"):
487 httplibuse
= "httplib";
488 if(not haverequests
and httplibuse
=="requests"):
489 httplibuse
= "urllib";
490 if(not havehttpx
and httplibuse
=="httpx"):
491 httplibuse
= "urllib";
492 if(not havehttpx
and httplibuse
=="httpx2"):
493 httplibuse
= "urllib";
494 if(not havehttpcore
and httplibuse
=="httpcore"):
495 httplibuse
= "urllib";
496 if(not havehttpcore
and httplibuse
=="httpcore2"):
497 httplibuse
= "urllib";
498 if(not havemechanize
and httplibuse
=="mechanize"):
499 httplibuse
= "urllib";
500 if(not havehttplib2
and httplibuse
=="httplib2"):
501 httplibuse
= "httplib";
502 if(not haveparamiko
and httplibuse
=="sftp"):
504 if(not havepysftp
and httplibuse
=="pysftp"):
506 urlparts
= urlparse
.urlparse(httpurl
);
507 if(isinstance(httpheaders
, list)):
508 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
509 httpheaders
= fix_header_names(httpheaders
);
510 if(httpuseragent
is not None):
511 if('User-Agent' in httpheaders
):
512 httpheaders
['User-Agent'] = httpuseragent
;
514 httpuseragent
.update({'User-Agent': httpuseragent
});
515 if(httpreferer
is not None):
516 if('Referer' in httpheaders
):
517 httpheaders
['Referer'] = httpreferer
;
519 httpuseragent
.update({'Referer': httpreferer
});
520 if(urlparts
.username
is not None or urlparts
.password
is not None):
521 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
522 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
523 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
524 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="mechanize" or httplibuse
=="mechanize"):
525 if(isinstance(httpheaders
, dict)):
526 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
527 geturls_opener
.addheaders
= httpheaders
;
529 if(postdata
is not None and not isinstance(postdata
, dict)):
530 postdata
= urlencode(postdata
);
531 if(httplibuse
=="urllib"):
533 if(httpmethod
=="GET"):
534 geturls_text
= geturls_opener
.open(httpurl
);
535 elif(httpmethod
=="POST"):
536 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
538 geturls_text
= geturls_opener
.open(httpurl
);
539 except HTTPError
as geturls_text_error
:
540 geturls_text
= geturls_text_error
;
541 log
.info("Error With URL "+httpurl
);
543 log
.info("Error With URL "+httpurl
);
545 except socket
.timeout
:
546 log
.info("Error With URL "+httpurl
);
548 httpcodeout
= geturls_text
.getcode();
549 httpversionout
= "1.1";
550 httpmethodout
= httpmethod
;
551 httpurlout
= geturls_text
.geturl();
552 httpheaderout
= geturls_text
.info();
553 httpheadersentout
= httpheaders
;
554 elif(httplibuse
=="request"):
556 if(httpmethod
=="GET"):
557 geturls_request
= Request(httpurl
, headers
=httpheaders
);
558 geturls_text
= urlopen(geturls_request
);
559 elif(httpmethod
=="POST"):
560 geturls_request
= Request(httpurl
, headers
=httpheaders
);
561 geturls_text
= urlopen(geturls_request
, data
=postdata
);
563 geturls_request
= Request(httpurl
, headers
=httpheaders
);
564 geturls_text
= urlopen(geturls_request
);
565 except HTTPError
as geturls_text_error
:
566 geturls_text
= geturls_text_error
;
567 log
.info("Error With URL "+httpurl
);
569 log
.info("Error With URL "+httpurl
);
571 except socket
.timeout
:
572 log
.info("Error With URL "+httpurl
);
574 httpcodeout
= geturls_text
.getcode();
575 httpversionout
= "1.1";
576 httpmethodout
= httpmethod
;
577 httpurlout
= geturls_text
.geturl();
578 httpheaderout
= geturls_text
.headers
;
579 httpheadersentout
= httpheaders
;
580 elif(httplibuse
=="request3"):
582 if(httpmethod
=="GET"):
583 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
584 elif(httpmethod
=="POST"):
585 geturls_text
= geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
587 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
588 except urllib3
.exceptions
.ConnectTimeoutError
:
589 log
.info("Error With URL "+httpurl
);
591 except urllib3
.exceptions
.ConnectError
:
592 log
.info("Error With URL "+httpurl
);
594 except urllib3
.exceptions
.MaxRetryError
:
595 log
.info("Error With URL "+httpurl
);
597 except socket
.timeout
:
598 log
.info("Error With URL "+httpurl
);
600 httpcodeout
= geturls_text
.status
;
601 httpversionout
= "1.1";
602 httpmethodout
= httpmethod
;
603 httpurlout
= geturls_text
.geturl();
604 httpheaderout
= geturls_text
.info();
605 httpheadersentout
= httpheaders
;
606 elif(httplibuse
=="httplib"):
607 if(urlparts
[0]=="http"):
608 httpconn
= HTTPConnection(urlparts
[1]);
609 elif(urlparts
[0]=="https"):
610 httpconn
= HTTPSConnection(urlparts
[1]);
613 if(postdata
is not None and not isinstance(postdata
, dict)):
614 postdata
= urlencode(postdata
);
616 if(httpmethod
=="GET"):
617 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
618 elif(httpmethod
=="POST"):
619 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
621 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
622 except socket
.timeout
:
623 log
.info("Error With URL "+httpurl
);
625 except socket
.gaierror
:
626 log
.info("Error With URL "+httpurl
);
628 geturls_text
= httpconn
.getresponse();
629 httpcodeout
= geturls_text
.status
;
630 httpversionout
= "1.1";
631 httpmethodout
= httpmethod
;
632 httpurlout
= httpurl
;
633 httpheaderout
= geturls_text
.getheaders();
634 httpheadersentout
= httpheaders
;
635 elif(httplibuse
=="httplib2"):
636 if(urlparts
[0]=="http"):
637 httpconn
= HTTPConnectionWithTimeout(urlparts
[1]);
638 elif(urlparts
[0]=="https"):
639 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1]);
642 if(postdata
is not None and not isinstance(postdata
, dict)):
643 postdata
= urlencode(postdata
);
645 if(httpmethod
=="GET"):
646 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
647 elif(httpmethod
=="POST"):
648 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
650 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
651 except socket
.timeout
:
652 log
.info("Error With URL "+httpurl
);
654 except socket
.gaierror
:
655 log
.info("Error With URL "+httpurl
);
657 geturls_text
= httpconn
.getresponse();
658 httpcodeout
= geturls_text
.status
;
659 httpversionout
= "1.1";
660 httpmethodout
= httpmethod
;
661 httpurlout
= httpurl
;
662 httpheaderout
= geturls_text
.getheaders();
663 httpheadersentout
= httpheaders
;
664 elif(httplibuse
=="urllib3"):
666 if(httpmethod
=="GET"):
667 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
668 elif(httpmethod
=="POST"):
669 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
671 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
672 except urllib3
.exceptions
.ConnectTimeoutError
:
673 log
.info("Error With URL "+httpurl
);
675 except urllib3
.exceptions
.ConnectError
:
676 log
.info("Error With URL "+httpurl
);
678 except urllib3
.exceptions
.MaxRetryError
:
679 log
.info("Error With URL "+httpurl
);
681 except socket
.timeout
:
682 log
.info("Error With URL "+httpurl
);
684 httpcodeout
= geturls_text
.status
;
685 httpversionout
= "1.1";
686 httpmethodout
= httpmethod
;
687 httpurlout
= geturls_text
.geturl();
688 httpheaderout
= geturls_text
.info();
689 httpheadersentout
= httpheaders
;
690 elif(httplibuse
=="requests"):
692 if(httpmethod
=="GET"):
693 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
694 elif(httpmethod
=="POST"):
695 geturls_text
= requests
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
697 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
698 except requests
.exceptions
.ConnectTimeout
:
699 log
.info("Error With URL "+httpurl
);
701 except requests
.exceptions
.ConnectError
:
702 log
.info("Error With URL "+httpurl
);
704 except socket
.timeout
:
705 log
.info("Error With URL "+httpurl
);
707 httpcodeout
= geturls_text
.status_code
;
708 httpversionout
= "1.1";
709 httpmethodout
= httpmethod
;
710 httpurlout
= geturls_text
.url
;
711 httpheaderout
= geturls_text
.headers
;
712 httpheadersentout
= httpheaders
;
713 elif(httplibuse
=="httpx"):
715 if(httpmethod
=="GET"):
716 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
717 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
718 elif(httpmethod
=="POST"):
719 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
720 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
722 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
723 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
724 except httpx
.ConnectTimeout
:
725 log
.info("Error With URL "+httpurl
);
727 except httpx
.ConnectError
:
728 log
.info("Error With URL "+httpurl
);
730 except socket
.timeout
:
731 log
.info("Error With URL "+httpurl
);
733 httpcodeout
= geturls_text
.status_code
;
734 httpversionout
= geturls_text
.http_version
;
735 httpmethodout
= httpmethod
;
736 httpurlout
= str(geturls_text
.url
);
737 httpheaderout
= geturls_text
.headers
;
738 httpheadersentout
= httpheaders
;
739 elif(httplibuse
=="httpx2"):
741 if(httpmethod
=="GET"):
742 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
743 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
744 elif(httpmethod
=="POST"):
745 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
746 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
748 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
749 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
750 except httpx
.ConnectTimeout
:
751 log
.info("Error With URL "+httpurl
);
753 except httpx
.ConnectError
:
754 log
.info("Error With URL "+httpurl
);
756 except socket
.timeout
:
757 log
.info("Error With URL "+httpurl
);
759 httpcodeout
= geturls_text
.status_code
;
760 httpversionout
= geturls_text
.http_version
;
761 httpmethodout
= httpmethod
;
762 httpurlout
= str(geturls_text
.url
);
763 httpheaderout
= geturls_text
.headers
;
764 httpheadersentout
= httpheaders
;
765 elif(httplibuse
=="httpcore"):
767 if(httpmethod
=="GET"):
768 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
769 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
770 elif(httpmethod
=="POST"):
771 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
772 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
774 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
775 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
776 except httpcore
.ConnectTimeout
:
777 log
.info("Error With URL "+httpurl
);
779 except httpcore
.ConnectError
:
780 log
.info("Error With URL "+httpurl
);
782 except socket
.timeout
:
783 log
.info("Error With URL "+httpurl
);
785 httpcodeout
= geturls_text
.status
;
786 httpversionout
= "1.1";
787 httpmethodout
= httpmethod
;
788 httpurlout
= str(httpurl
);
789 httpheaderout
= geturls_text
.headers
;
790 httpheadersentout
= httpheaders
;
791 elif(httplibuse
=="httpcore2"):
793 if(httpmethod
=="GET"):
794 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
795 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
796 elif(httpmethod
=="POST"):
797 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
798 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
800 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
801 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
802 except httpcore
.ConnectTimeout
:
803 log
.info("Error With URL "+httpurl
);
805 except httpcore
.ConnectError
:
806 log
.info("Error With URL "+httpurl
);
808 except socket
.timeout
:
809 log
.info("Error With URL "+httpurl
);
811 httpcodeout
= geturls_text
.status
;
812 httpversionout
= "1.1";
813 httpmethodout
= httpmethod
;
814 httpurlout
= str(httpurl
);
815 httpheaderout
= geturls_text
.headers
;
816 httpheadersentout
= httpheaders
;
817 elif(httplibuse
=="mechanize"):
819 if(httpmethod
=="GET"):
820 geturls_text
= geturls_opener
.open(httpurl
);
821 elif(httpmethod
=="POST"):
822 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
824 geturls_text
= geturls_opener
.open(httpurl
);
825 except mechanize
.HTTPError
as geturls_text_error
:
826 geturls_text
= geturls_text_error
;
827 log
.info("Error With URL "+httpurl
);
829 log
.info("Error With URL "+httpurl
);
831 except socket
.timeout
:
832 log
.info("Error With URL "+httpurl
);
834 httpcodeout
= geturls_text
.code
;
835 httpversionout
= "1.1";
836 httpmethodout
= httpmethod
;
837 httpurlout
= geturls_text
.geturl();
838 httpheaderout
= geturls_text
.info();
839 httpheadersentout
= httpheaders
;
840 elif(httplibuse
=="ftp"):
841 geturls_text
= download_file_from_ftp_file(httpurl
);
842 if(not geturls_text
):
844 log
.info("Downloading URL "+httpurl
);
845 returnval_content
= geturls_text
.read()[:];
846 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
847 geturls_text
.close();
848 elif(httplibuse
=="sftp"):
849 geturls_text
= download_file_from_sftp_file(httpurl
);
850 if(not geturls_text
):
852 log
.info("Downloading URL "+httpurl
);
853 returnval_content
= geturls_text
.read()[:];
854 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
855 geturls_text
.close();
857 elif(httplibuse
=="pysftp"):
858 geturls_text
= download_file_from_pysftp_file(httpurl
);
859 if(not geturls_text
):
861 log
.info("Downloading URL "+httpurl
);
862 returnval_content
= geturls_text
.read()[:];
863 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
864 geturls_text
.close();
868 if(isinstance(httpheaderout
, list)):
869 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
870 if(sys
.version
[0]=="2"):
872 prehttpheaderout
= httpheaderout
;
873 httpheaderkeys
= httpheaderout
.keys();
874 imax
= len(httpheaderkeys
);
878 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
880 except AttributeError:
882 httpheaderout
= fix_header_names(httpheaderout
);
883 if(isinstance(httpheadersentout
, list)):
884 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
885 httpheadersentout
= fix_header_names(httpheadersentout
);
886 log
.info("Downloading URL "+httpurl
);
887 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="request3" or httplibuse
=="httplib" or httplibuse
=="httplib2" or httplibuse
=="urllib3" or httplibuse
=="mechanize" or httplibuse
=="httpx" or httplibuse
=="httpx2" or httplibuse
=="httpcore" or httplibuse
=="httpcore2"):
888 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
889 if(sys
.version
[0]=="2"):
890 strbuf
= StringIO(geturls_text
.read());
891 if(sys
.version
[0]>="3"):
892 strbuf
= BytesIO(geturls_text
.read());
893 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
894 returnval_content
= gzstrbuf
.read()[:];
895 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
896 returnval_content
= geturls_text
.read()[:];
897 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
898 returnval_content
= geturls_text
.read()[:];
899 returnval_content
= brotli
.decompress(returnval_content
);
900 geturls_text
.close();
901 elif(httplibuse
=="requests"):
902 log
.info("Downloading URL "+httpurl
);
903 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
904 if(sys
.version
[0]=="2"):
905 strbuf
= StringIO(gzstrbuf
.raw
.read());
906 if(sys
.version
[0]>="3"):
907 strbuf
= BytesIO(gzstrbuf
.raw
.read());
908 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
909 returnval_content
= gzstrbuf
.read()[:];
910 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
911 returnval_content
= gzstrbuf
.raw
.read()[:];
912 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
913 returnval_content
= gzstrbuf
.raw
.read()[:];
914 returnval_content
= brotli
.decompress(returnval_content
);
915 geturls_text
.close();
916 elif(httplibuse
=="ftp" or httplibuse
=="sftp" or httplibuse
=="pysftp"):
920 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
923 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1):
924 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
925 exec_time_start
= time
.time();
926 myhash
= hashlib
.new("sha1");
927 if(sys
.version
[0]=="2"):
928 myhash
.update(httpurl
);
929 myhash
.update(str(buffersize
));
930 myhash
.update(str(exec_time_start
));
931 if(sys
.version
[0]>="3"):
932 myhash
.update(httpurl
.encode('utf-8'));
933 myhash
.update(str(buffersize
).encode('utf-8'));
934 myhash
.update(str(exec_time_start
).encode('utf-8'));
935 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
937 sleep
= geturls_download_sleep
;
938 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
939 httplibuse
= "urllib";
940 if(httplibuse
=="httplib1"):
941 httplibuse
= "httplib";
942 if(not haverequests
and httplibuse
=="requests"):
943 httplibuse
= "urllib";
944 if(not havehttpx
and httplibuse
=="httpx"):
945 httplibuse
= "urllib";
946 if(not havehttpx
and httplibuse
=="httpx2"):
947 httplibuse
= "urllib";
948 if(not havehttpcore
and httplibuse
=="httpcore"):
949 httplibuse
= "urllib";
950 if(not havehttpcore
and httplibuse
=="httpcore2"):
951 httplibuse
= "urllib";
952 if(not havemechanize
and httplibuse
=="mechanize"):
953 httplibuse
= "urllib";
954 if(not havehttplib2
and httplibuse
=="httplib2"):
955 httplibuse
= "httplib";
956 if(not haveparamiko
and httplibuse
=="sftp"):
958 if(not haveparamiko
and httplibuse
=="pysftp"):
960 urlparts
= urlparse
.urlparse(httpurl
);
961 if(isinstance(httpheaders
, list)):
962 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
963 httpheaders
= fix_header_names(httpheaders
);
964 if(httpuseragent
is not None):
965 if('User-Agent' in httpheaders
):
966 httpheaders
['User-Agent'] = httpuseragent
;
968 httpuseragent
.update({'User-Agent': httpuseragent
});
969 if(httpreferer
is not None):
970 if('Referer' in httpheaders
):
971 httpheaders
['Referer'] = httpreferer
;
973 httpuseragent
.update({'Referer': httpreferer
});
974 if(urlparts
.username
is not None or urlparts
.password
is not None):
975 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
976 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
977 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
978 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="mechanize" or httplibuse
=="mechanize"):
979 if(isinstance(httpheaders
, dict)):
980 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
981 geturls_opener
.addheaders
= httpheaders
;
983 if(httplibuse
=="urllib"):
985 if(httpmethod
=="GET"):
986 geturls_text
= geturls_opener
.open(httpurl
);
987 elif(httpmethod
=="POST"):
988 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
990 geturls_text
= geturls_opener
.open(httpurl
);
991 except HTTPError
as geturls_text_error
:
992 geturls_text
= geturls_text_error
;
993 log
.info("Error With URL "+httpurl
);
995 log
.info("Error With URL "+httpurl
);
997 except socket
.timeout
:
998 log
.info("Error With URL "+httpurl
);
1000 except socket
.timeout
:
1001 log
.info("Error With URL "+httpurl
);
1003 httpcodeout
= geturls_text
.getcode();
1004 httpversionout
= "1.1";
1005 httpmethodout
= httpmethod
;
1006 httpurlout
= geturls_text
.geturl();
1007 httpheaderout
= geturls_text
.info();
1008 httpheadersentout
= httpheaders
;
1009 elif(httplibuse
=="request"):
1011 if(httpmethod
=="GET"):
1012 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1013 geturls_text
= urlopen(geturls_request
);
1014 elif(httpmethod
=="POST"):
1015 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1016 geturls_text
= urlopen(geturls_request
, data
=postdata
);
1018 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1019 geturls_text
= urlopen(geturls_request
);
1020 except HTTPError
as geturls_text_error
:
1021 geturls_text
= geturls_text_error
;
1022 log
.info("Error With URL "+httpurl
);
1024 log
.info("Error With URL "+httpurl
);
1026 except socket
.timeout
:
1027 log
.info("Error With URL "+httpurl
);
1029 httpcodeout
= geturls_text
.getcode();
1030 httpversionout
= "1.1";
1031 httpmethodout
= httpmethod
;
1032 httpurlout
= geturls_text
.geturl();
1033 httpheaderout
= geturls_text
.headers
;
1034 httpheadersentout
= httpheaders
;
1035 elif(httplibuse
=="request3"):
1037 if(httpmethod
=="GET"):
1038 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1039 elif(httpmethod
=="POST"):
1040 geturls_text
= geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
1042 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1043 except urllib3
.exceptions
.ConnectTimeoutError
:
1044 log
.info("Error With URL "+httpurl
);
1046 except urllib3
.exceptions
.ConnectError
:
1047 log
.info("Error With URL "+httpurl
);
1049 except urllib3
.exceptions
.MaxRetryError
:
1050 log
.info("Error With URL "+httpurl
);
1052 except socket
.timeout
:
1053 log
.info("Error With URL "+httpurl
);
1055 httpcodeout
= geturls_text
.status
;
1056 httpversionout
= "1.1";
1057 httpmethodout
= httpmethod
;
1058 httpurlout
= geturls_text
.geturl();
1059 httpheaderout
= geturls_text
.info();
1060 httpheadersentout
= httpheaders
;
1061 elif(httplibuse
=="httplib"):
1062 if(urlparts
[0]=="http"):
1063 httpconn
= HTTPConnection(urlparts
[1]);
1064 elif(urlparts
[0]=="https"):
1065 httpconn
= HTTPSConnection(urlparts
[1]);
1068 if(postdata
is not None and not isinstance(postdata
, dict)):
1069 postdata
= urlencode(postdata
);
1071 if(httpmethod
=="GET"):
1072 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1073 elif(httpmethod
=="POST"):
1074 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1076 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1077 except socket
.timeout
:
1078 log
.info("Error With URL "+httpurl
);
1080 except socket
.gaierror
:
1081 log
.info("Error With URL "+httpurl
);
1083 geturls_text
= httpconn
.getresponse();
1084 httpcodeout
= geturls_text
.status
;
1085 httpversionout
= "1.1";
1086 httpmethodout
= httpmethod
;
1087 httpurlout
= httpurl
;
1088 httpheaderout
= geturls_text
.getheaders();
1089 httpheadersentout
= httpheaders
;
1090 elif(httplibuse
=="httplib2"):
1092 if(httpmethod
=="GET"):
1093 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1094 elif(httpmethod
=="POST"):
1095 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1097 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1098 except socket
.timeout
:
1099 log
.info("Error With URL "+httpurl
);
1101 except socket
.gaierror
:
1102 log
.info("Error With URL "+httpurl
);
1104 geturls_text
= httpconn
.getresponse();
1105 httpcodeout
= geturls_text
.status
;
1106 httpversionout
= "1.1";
1107 httpmethodout
= httpmethod
;
1108 httpurlout
= httpurl
;
1109 httpheaderout
= geturls_text
.getheaders();
1110 httpheadersentout
= httpheaders
;
1111 elif(httplibuse
=="urllib3"):
1113 if(httpmethod
=="GET"):
1114 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1115 elif(httpmethod
=="POST"):
1116 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
1118 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1119 except urllib3
.exceptions
.ConnectTimeoutError
:
1120 log
.info("Error With URL "+httpurl
);
1122 except urllib3
.exceptions
.ConnectError
:
1123 log
.info("Error With URL "+httpurl
);
1125 except urllib3
.exceptions
.MaxRetryError
:
1126 log
.info("Error With URL "+httpurl
);
1128 except socket
.timeout
:
1129 log
.info("Error With URL "+httpurl
);
1131 httpcodeout
= geturls_text
.status
;
1132 httpversionout
= "1.1";
1133 httpmethodout
= httpmethod
;
1134 httpurlout
= geturls_text
.geturl();
1135 httpheaderout
= geturls_text
.info();
1136 httpheadersentout
= httpheaders
;
1137 elif(httplibuse
=="requests"):
1139 if(httpmethod
=="GET"):
1140 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1141 elif(httpmethod
=="POST"):
1142 geturls_text
= requests
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1144 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1145 except requests
.exceptions
.ConnectTimeout
:
1146 log
.info("Error With URL "+httpurl
);
1148 except requests
.exceptions
.ConnectError
:
1149 log
.info("Error With URL "+httpurl
);
1151 except socket
.timeout
:
1152 log
.info("Error With URL "+httpurl
);
1154 httpcodeout
= geturls_text
.status_code
;
1155 httpversionout
= "1.1";
1156 httpmethodout
= httpmethod
;
1157 httpurlout
= geturls_text
.url
;
1158 httpheaderout
= geturls_text
.headers
;
1159 httpheadersentout
= httpheaders
;
1160 elif(httplibuse
=="httpx"):
1162 if(httpmethod
=="GET"):
1163 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1164 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1165 elif(httpmethod
=="POST"):
1166 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1167 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1169 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1170 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1171 except httpx
.ConnectTimeout
:
1172 log
.info("Error With URL "+httpurl
);
1174 except httpx
.ConnectError
:
1175 log
.info("Error With URL "+httpurl
);
1177 except socket
.timeout
:
1178 log
.info("Error With URL "+httpurl
);
1180 httpcodeout
= geturls_text
.status_code
;
1181 httpversionout
= geturls_text
.http_version
;
1182 httpmethodout
= httpmethod
;
1183 httpurlout
= str(geturls_text
.url
);
1184 httpheaderout
= geturls_text
.headers
;
1185 httpheadersentout
= httpheaders
;
1186 elif(httplibuse
=="httpx2"):
1188 if(httpmethod
=="GET"):
1189 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
1190 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1191 elif(httpmethod
=="POST"):
1192 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
1193 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1195 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
1196 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1197 except httpx
.ConnectTimeout
:
1198 log
.info("Error With URL "+httpurl
);
1200 except httpx
.ConnectError
:
1201 log
.info("Error With URL "+httpurl
);
1203 except socket
.timeout
:
1204 log
.info("Error With URL "+httpurl
);
1206 httpcodeout
= geturls_text
.status_code
;
1207 httpversionout
= geturls_text
.http_version
;
1208 httpmethodout
= httpmethod
;
1209 httpurlout
= str(geturls_text
.url
);
1210 httpheaderout
= geturls_text
.headers
;
1211 httpheadersentout
= httpheaders
;
1212 elif(httplibuse
=="httpcore"):
1214 if(httpmethod
=="GET"):
1215 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
1216 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1217 elif(httpmethod
=="POST"):
1218 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
1219 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
1221 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
1222 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1223 except httpcore
.ConnectTimeout
:
1224 log
.info("Error With URL "+httpurl
);
1226 except httpcore
.ConnectError
:
1227 log
.info("Error With URL "+httpurl
);
1229 except socket
.timeout
:
1230 log
.info("Error With URL "+httpurl
);
1232 httpcodeout
= geturls_text
.status
;
1233 httpversionout
= "1.1";
1234 httpmethodout
= httpmethod
;
1235 httpurlout
= str(httpurl
);
1236 httpheaderout
= geturls_text
.headers
;
1237 httpheadersentout
= httpheaders
;
1238 elif(httplibuse
=="httpcore2"):
1240 if(httpmethod
=="GET"):
1241 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
1242 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1243 elif(httpmethod
=="POST"):
1244 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
1245 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
1247 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
1248 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1249 except httpcore
.ConnectTimeout
:
1250 log
.info("Error With URL "+httpurl
);
1252 except httpcore
.ConnectError
:
1253 log
.info("Error With URL "+httpurl
);
1255 except socket
.timeout
:
1256 log
.info("Error With URL "+httpurl
);
1258 httpcodeout
= geturls_text
.status
;
1259 httpversionout
= "1.1";
1260 httpmethodout
= httpmethod
;
1261 httpurlout
= str(httpurl
);
1262 httpheaderout
= geturls_text
.headers
;
1263 httpheadersentout
= httpheaders
;
1264 elif(httplibuse
=="mechanize"):
1266 if(httpmethod
=="GET"):
1267 geturls_text
= geturls_opener
.open(httpurl
);
1268 elif(httpmethod
=="POST"):
1269 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
1271 geturls_text
= geturls_opener
.open(httpurl
);
1272 except mechanize
.HTTPError
as geturls_text_error
:
1273 geturls_text
= geturls_text_error
;
1274 log
.info("Error With URL "+httpurl
);
1276 log
.info("Error With URL "+httpurl
);
1278 except socket
.timeout
:
1279 log
.info("Error With URL "+httpurl
);
1281 httpcodeout
= geturls_text
.code
;
1282 httpversionout
= "1.1";
1283 httpmethodout
= httpmethod
;
1284 httpurlout
= geturls_text
.geturl();
1285 httpheaderout
= geturls_text
.info();
1286 httpheadersentout
= httpheaders
;
1287 elif(httplibuse
=="ftp"):
1288 geturls_text
= download_file_from_ftp_file(httpurl
);
1289 if(not geturls_text
):
1291 geturls_text
.seek(0, 2);
1292 downloadsize
= geturls_text
.tell();
1293 geturls_text
.seek(0, 0);
1294 elif(httplibuse
=="sftp"):
1295 geturls_text
= download_file_from_sftp_file(httpurl
);
1296 if(not geturls_text
):
1298 geturls_text
.seek(0, 2);
1299 downloadsize
= geturls_text
.tell();
1300 geturls_text
.seek(0, 0);
1301 if(downloadsize
is not None):
1302 downloadsize
= int(downloadsize
);
1303 if downloadsize
is None: downloadsize
= 0;
1306 elif(httplibuse
=="pysftp"):
1307 geturls_text
= download_file_from_pysftp_file(httpurl
);
1308 if(not geturls_text
):
1310 geturls_text
.seek(0, 2);
1311 downloadsize
= geturls_text
.tell();
1312 geturls_text
.seek(0, 0);
1315 if(isinstance(httpheaderout
, list)):
1316 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1317 if(sys
.version
[0]=="2"):
1319 prehttpheaderout
= httpheaderout
;
1320 httpheaderkeys
= httpheaderout
.keys();
1321 imax
= len(httpheaderkeys
);
1325 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1327 except AttributeError:
1329 httpheaderout
= fix_header_names(httpheaderout
);
1330 if(isinstance(httpheadersentout
, list)):
1331 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1332 httpheadersentout
= fix_header_names(httpheadersentout
);
1333 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="request3" or httplibuse
=="httplib" or httplibuse
=="httplib2" or httplibuse
=="urllib3" or httplibuse
=="requests" or httplibuse
=="mechanize" or httplibuse
=="httpx" or httplibuse
=="httpx2" or httplibuse
=="httpcore" or httplibuse
=="httpcore2"):
1334 downloadsize
= httpheaderout
.get('Content-Length');
1335 if(downloadsize
is not None):
1336 downloadsize
= int(downloadsize
);
1337 if downloadsize
is None: downloadsize
= 0;
1340 log
.info("Downloading URL "+httpurl
);
1341 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="request3" or httplibuse
=="httplib" or httplibuse
=="httplib2" or httplibuse
=="urllib3" or httplibuse
=="mechanize" or httplibuse
=="httpx" or httplibuse
=="httpx2" or httplibuse
=="httpcore" or httplibuse
=="httpcore2" or httplibuse
=="ftp" or httplibuse
=="sftp" or httplibuse
=="pysftp"):
1342 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1343 tmpfilename
= f
.name
;
1345 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
1346 except AttributeError:
1348 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1353 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
1355 databytes
= geturls_text
.read(buffersize
);
1356 if not databytes
: break;
1357 datasize
= len(databytes
);
1358 fulldatasize
= datasize
+ fulldatasize
;
1361 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1362 downloaddiff
= fulldatasize
- prevdownsize
;
1363 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1364 prevdownsize
= fulldatasize
;
1367 elif(httplibuse
=="requests"):
1368 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1369 tmpfilename
= f
.name
;
1371 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
1372 except AttributeError:
1374 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1379 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
1380 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
1381 datasize
= len(databytes
);
1382 fulldatasize
= datasize
+ fulldatasize
;
1385 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1386 downloaddiff
= fulldatasize
- prevdownsize
;
1387 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1388 prevdownsize
= fulldatasize
;
1393 geturls_text
.close();
1394 exec_time_end
= time
.time();
1395 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1396 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1399 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1400 global geturls_download_sleep
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcorei
, haveparamiko
, havepysftp
;
1402 sleep
= geturls_download_sleep
;
1403 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
1404 httplibuse
= "urllib";
1405 if(httplibuse
=="httplib1"):
1406 httplibuse
= "httplib";
1407 if(not haverequests
and httplibuse
=="requests"):
1408 httplibuse
= "urllib";
1409 if(not havehttpx
and httplibuse
=="httpx"):
1410 httplibuse
= "urllib";
1411 if(not havehttpx
and httplibuse
=="httpx2"):
1412 httplibuse
= "urllib";
1413 if(not havehttpcore
and httplibuse
=="httpcore"):
1414 httplibuse
= "urllib";
1415 if(not havehttpcore
and httplibuse
=="httpcore2"):
1416 httplibuse
= "urllib";
1417 if(not havemechanize
and httplibuse
=="mechanize"):
1418 httplibuse
= "urllib";
1419 if(not havehttplib2
and httplibuse
=="httplib2"):
1420 httplibuse
= "httplib";
1421 if(not haveparamiko
and httplibuse
=="sftp"):
1423 if(not havepysftp
and httplibuse
=="pysftp"):
1425 if(not outfile
=="-"):
1426 outpath
= outpath
.rstrip(os
.path
.sep
);
1427 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1428 if(not os
.path
.exists(outpath
)):
1429 os
.makedirs(outpath
);
1430 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1432 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1434 pretmpfilename
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, buffersize
[0], sleep
);
1435 if(not pretmpfilename
):
1437 tmpfilename
= pretmpfilename
['Filename'];
1438 downloadsize
= os
.path
.getsize(tmpfilename
);
1440 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1441 exec_time_start
= time
.time();
1442 shutil
.move(tmpfilename
, filepath
);
1444 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1445 except AttributeError:
1447 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1452 exec_time_end
= time
.time();
1453 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1454 if(os
.path
.exists(tmpfilename
)):
1455 os
.remove(tmpfilename
);
1456 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1457 if(outfile
=="-" and sys
.version
[0]=="2"):
1458 pretmpfilename
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, buffersize
[0], sleep
);
1459 if(not pretmpfilename
):
1461 tmpfilename
= pretmpfilename
['Filename'];
1462 downloadsize
= os
.path
.getsize(tmpfilename
);
1465 exec_time_start
= time
.time();
1466 with
open(tmpfilename
, 'rb') as ft
:
1469 databytes
= ft
.read(buffersize
[1]);
1470 if not databytes
: break;
1471 datasize
= len(databytes
);
1472 fulldatasize
= datasize
+ fulldatasize
;
1475 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1476 downloaddiff
= fulldatasize
- prevdownsize
;
1477 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1478 prevdownsize
= fulldatasize
;
1481 fdata
= f
.getvalue();
1484 os
.remove(tmpfilename
);
1485 exec_time_end
= time
.time();
1486 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1487 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1488 if(outfile
=="-" and sys
.version
[0]>="3"):
1489 pretmpfilename
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, buffersize
[0], sleep
);
1490 tmpfilename
= pretmpfilename
['Filename'];
1491 downloadsize
= os
.path
.getsize(tmpfilename
);
1494 exec_time_start
= time
.time();
1495 with
open(tmpfilename
, 'rb') as ft
:
1498 databytes
= ft
.read(buffersize
[1]);
1499 if not databytes
: break;
1500 datasize
= len(databytes
);
1501 fulldatasize
= datasize
+ fulldatasize
;
1504 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1505 downloaddiff
= fulldatasize
- prevdownsize
;
1506 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1507 prevdownsize
= fulldatasize
;
1510 fdata
= f
.getvalue();
1513 os
.remove(tmpfilename
);
1514 exec_time_end
= time
.time();
1515 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1516 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1519 def download_file_from_ftp_file(url
):
1520 urlparts
= urlparse
.urlparse(url
);
1521 file_name
= os
.path
.basename(urlparts
.path
);
1522 file_dir
= os
.path
.dirname(urlparts
.path
);
1523 if(urlparts
.username
is not None):
1524 ftp_username
= urlparts
.username
;
1526 ftp_username
= "anonymous";
1527 if(urlparts
.password
is not None):
1528 ftp_password
= urlparts
.password
;
1529 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
1530 ftp_password
= "anonymous";
1533 if(urlparts
.scheme
=="ftp"):
1535 elif(urlparts
.scheme
=="ftps"):
1539 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
1541 ftp_port
= urlparts
.port
;
1542 if(urlparts
.port
is None):
1545 ftp
.connect(urlparts
.hostname
, ftp_port
);
1546 except socket
.gaierror
:
1547 log
.info("Error With URL "+httpurl
);
1549 except socket
.timeout
:
1550 log
.info("Error With URL "+httpurl
);
1552 ftp
.login(urlparts
.username
, urlparts
.password
);
1553 if(urlparts
.scheme
=="ftps"):
1555 ftpfile
= BytesIO();
1556 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
1557 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
1562 def download_file_from_ftp_string(url
):
1563 ftpfile
= download_file_from_ftp_file(url
);
1564 return ftpfile
.read();
1566 def upload_file_to_ftp_file(ftpfile
, url
):
1567 urlparts
= urlparse
.urlparse(url
);
1568 file_name
= os
.path
.basename(urlparts
.path
);
1569 file_dir
= os
.path
.dirname(urlparts
.path
);
1570 if(urlparts
.username
is not None):
1571 ftp_username
= urlparts
.username
;
1573 ftp_username
= "anonymous";
1574 if(urlparts
.password
is not None):
1575 ftp_password
= urlparts
.password
;
1576 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
1577 ftp_password
= "anonymous";
1580 if(urlparts
.scheme
=="ftp"):
1582 elif(urlparts
.scheme
=="ftps"):
1586 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
1588 ftp_port
= urlparts
.port
;
1589 if(urlparts
.port
is None):
1592 ftp
.connect(urlparts
.hostname
, ftp_port
);
1593 except socket
.gaierror
:
1594 log
.info("Error With URL "+httpurl
);
1596 except socket
.timeout
:
1597 log
.info("Error With URL "+httpurl
);
1599 ftp
.login(urlparts
.username
, urlparts
.password
);
1600 if(urlparts
.scheme
=="ftps"):
1602 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
1607 def upload_file_to_ftp_string(ftpstring
, url
):
1608 ftpfileo
= BytesIO(ftpstring
);
1609 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
1614 def download_file_from_sftp_file(url
):
1615 urlparts
= urlparse
.urlparse(url
);
1616 file_name
= os
.path
.basename(urlparts
.path
);
1617 file_dir
= os
.path
.dirname(urlparts
.path
);
1618 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
1620 sftp_port
= urlparts
.port
;
1621 if(urlparts
.port
is None):
1624 sftp_port
= urlparts
.port
;
1625 if(urlparts
.username
is not None):
1626 sftp_username
= urlparts
.username
;
1628 sftp_username
= "anonymous";
1629 if(urlparts
.password
is not None):
1630 sftp_password
= urlparts
.password
;
1631 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
1632 sftp_password
= "anonymous";
1635 if(urlparts
.scheme
!="sftp"):
1637 ssh
= paramiko
.SSHClient();
1638 ssh
.load_system_host_keys();
1639 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
1641 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
1642 except paramiko
.ssh_exception
.SSHException
:
1644 except socket
.gaierror
:
1645 log
.info("Error With URL "+httpurl
);
1647 except socket
.timeout
:
1648 log
.info("Error With URL "+httpurl
);
1650 sftp
= ssh
.open_sftp();
1651 sftpfile
= BytesIO();
1652 sftp
.getfo(urlparts
.path
, sftpfile
);
1655 sftpfile
.seek(0, 0);
1658 def download_file_from_sftp_file(url
):
1662 def download_file_from_sftp_string(url
):
1663 sftpfile
= download_file_from_sftp_file(url
);
1664 return sftpfile
.read();
1666 def download_file_from_ftp_string(url
):
1670 def upload_file_to_sftp_file(sftpfile
, url
):
1671 urlparts
= urlparse
.urlparse(url
);
1672 file_name
= os
.path
.basename(urlparts
.path
);
1673 file_dir
= os
.path
.dirname(urlparts
.path
);
1674 sftp_port
= urlparts
.port
;
1675 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
1677 if(urlparts
.port
is None):
1680 sftp_port
= urlparts
.port
;
1681 if(urlparts
.username
is not None):
1682 sftp_username
= urlparts
.username
;
1684 sftp_username
= "anonymous";
1685 if(urlparts
.password
is not None):
1686 sftp_password
= urlparts
.password
;
1687 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
1688 sftp_password
= "anonymous";
1691 if(urlparts
.scheme
!="sftp"):
1693 ssh
= paramiko
.SSHClient();
1694 ssh
.load_system_host_keys();
1695 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
1697 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
1698 except paramiko
.ssh_exception
.SSHException
:
1700 except socket
.gaierror
:
1701 log
.info("Error With URL "+httpurl
);
1703 except socket
.timeout
:
1704 log
.info("Error With URL "+httpurl
);
1706 sftp
= ssh
.open_sftp();
1707 sftp
.putfo(sftpfile
, urlparts
.path
);
1710 sftpfile
.seek(0, 0);
1713 def upload_file_to_sftp_file(sftpfile
, url
):
1717 def upload_file_to_sftp_string(sftpstring
, url
):
1718 sftpfileo
= BytesIO(sftpstring
);
1719 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
1723 def upload_file_to_sftp_string(url
):
1728 def download_file_from_pysftp_file(url
):
1729 urlparts
= urlparse
.urlparse(url
);
1730 file_name
= os
.path
.basename(urlparts
.path
);
1731 file_dir
= os
.path
.dirname(urlparts
.path
);
1732 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
1734 sftp_port
= urlparts
.port
;
1735 if(urlparts
.port
is None):
1738 sftp_port
= urlparts
.port
;
1739 if(urlparts
.username
is not None):
1740 sftp_username
= urlparts
.username
;
1742 sftp_username
= "anonymous";
1743 if(urlparts
.password
is not None):
1744 sftp_password
= urlparts
.password
;
1745 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
1746 sftp_password
= "anonymous";
1749 if(urlparts
.scheme
!="sftp"):
1752 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
1753 except paramiko
.ssh_exception
.SSHException
:
1755 except socket
.gaierror
:
1756 log
.info("Error With URL "+httpurl
);
1758 except socket
.timeout
:
1759 log
.info("Error With URL "+httpurl
);
1761 sftp
= ssh
.open_sftp();
1762 sftpfile
= BytesIO();
1763 sftp
.getfo(urlparts
.path
, sftpfile
);
1766 sftpfile
.seek(0, 0);
1769 def download_file_from_pysftp_file(url
):
1773 def download_file_from_pysftp_string(url
):
1774 sftpfile
= download_file_from_pysftp_file(url
);
1775 return sftpfile
.read();
1777 def download_file_from_ftp_string(url
):
1781 def upload_file_to_pysftp_file(sftpfile
, url
):
1782 urlparts
= urlparse
.urlparse(url
);
1783 file_name
= os
.path
.basename(urlparts
.path
);
1784 file_dir
= os
.path
.dirname(urlparts
.path
);
1785 sftp_port
= urlparts
.port
;
1786 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
1788 if(urlparts
.port
is None):
1791 sftp_port
= urlparts
.port
;
1792 if(urlparts
.username
is not None):
1793 sftp_username
= urlparts
.username
;
1795 sftp_username
= "anonymous";
1796 if(urlparts
.password
is not None):
1797 sftp_password
= urlparts
.password
;
1798 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
1799 sftp_password
= "anonymous";
1802 if(urlparts
.scheme
!="sftp"):
1805 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
1806 except paramiko
.ssh_exception
.SSHException
:
1808 except socket
.gaierror
:
1809 log
.info("Error With URL "+httpurl
);
1811 except socket
.timeout
:
1812 log
.info("Error With URL "+httpurl
);
1814 sftp
= ssh
.open_sftp();
1815 sftp
.putfo(sftpfile
, urlparts
.path
);
1818 sftpfile
.seek(0, 0);
1821 def upload_file_to_pysftp_file(sftpfile
, url
):
1825 def upload_file_to_pysftp_string(sftpstring
, url
):
1826 sftpfileo
= BytesIO(sftpstring
);
1827 sftpfile
= upload_file_to_pysftp_files(ftpfileo
, url
);
1831 def upload_file_to_pysftp_string(url
):