4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwget.py - Last Update: 9/24/2023 Ver. 1.5.0 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, gzip
, time
, argparse
, cgi
, subprocess
, socket
, email
.utils
, datetime
, time
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
29 havemechanize
= False;
34 havemechanize
= False;
55 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
83 if(sys
.version
[0]=="2"):
85 from cStringIO
import StringIO
;
87 from StringIO
import StringIO
;
88 # From http://python-future.org/compatible_idioms.html
89 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
90 from urllib
import urlencode
;
91 from urllib
import urlopen
as urlopenalt
;
92 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
, URLError
, build_opener
, HTTPCookieProcessor
;
93 import urlparse
, cookielib
;
94 from httplib
import HTTPConnection
, HTTPSConnection
;
95 if(sys
.version
[0]>="3"):
96 from io
import StringIO
, BytesIO
;
97 # From http://python-future.org/compatible_idioms.html
98 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
99 from urllib
.request
import urlopen
, Request
, install_opener
, build_opener
, HTTPCookieProcessor
;
100 from urllib
.error
import HTTPError
, URLError
;
101 import urllib
.parse
as urlparse
;
102 import http
.cookiejar
as cookielib
;
103 from http
.client
import HTTPConnection
, HTTPSConnection
;
105 __program_name__
= "PyWWW-Get";
106 __program_alt_name__
= "PyWWWGet";
107 __program_small_name__
= "wwwget";
108 __project__
= __program_name__
;
109 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
110 __version_info__
= (1, 5, 0, "RC 1", 1);
111 __version_date_info__
= (2023, 9, 24, "RC 1", 1);
112 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
113 __revision__
= __version_info__
[3];
114 __revision_id__
= "$Id$";
115 if(__version_info__
[4] is not None):
116 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
117 if(__version_info__
[4] is None):
118 __version_date_plusrc__
= __version_date__
;
119 if(__version_info__
[3] is not None):
120 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
121 if(__version_info__
[3] is None):
122 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
124 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
126 pytempdir
= tempfile
.gettempdir();
128 PyBitness
= platform
.architecture();
129 if(PyBitness
=="32bit" or PyBitness
=="32"):
131 elif(PyBitness
=="64bit" or PyBitness
=="64"):
136 compression_supported
= "gzip, deflate";
138 compression_supported
= "gzip, deflate, br";
140 compression_supported
= "gzip, deflate";
142 geturls_cj
= cookielib
.CookieJar();
143 windowsNT4_ua_string
= "Windows NT 4.0";
144 windowsNT4_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
145 windows2k_ua_string
= "Windows NT 5.0";
146 windows2k_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
147 windowsXP_ua_string
= "Windows NT 5.1";
148 windowsXP_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
149 windowsXP64_ua_string
= "Windows NT 5.2; Win64; x64";
150 windowsXP64_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
151 windows7_ua_string
= "Windows NT 6.1; Win64; x64";
152 windows7_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
153 windows8_ua_string
= "Windows NT 6.2; Win64; x64";
154 windows8_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
155 windows81_ua_string
= "Windows NT 6.3; Win64; x64";
156 windows81_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
157 windows10_ua_string
= "Windows NT 10.0; Win64; x64";
158 windows10_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
159 windows11_ua_string
= "Windows NT 11.0; Win64; x64";
160 windows11_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
161 geturls_ua_firefox_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:109.0) Gecko/20100101 Firefox/117.0";
162 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
163 geturls_ua_chrome_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
164 geturls_ua_chromium_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
165 geturls_ua_palemoon_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
166 geturls_ua_opera_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
167 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
168 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; Trident/7.0; rv:11.0) like Gecko";
169 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
170 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
171 if(platform
.python_implementation()!=""):
172 py_implementation
= platform
.python_implementation();
173 if(platform
.python_implementation()==""):
174 py_implementation
= "Python";
175 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=py_implementation
, pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
176 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
177 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
178 geturls_ua
= geturls_ua_firefox_windows7
;
179 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
180 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
181 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
182 geturls_headers_chrome_windows7
.update(windows7_ua_addon
);
183 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
184 geturls_headers_chromium_windows7
.update(windows7_ua_addon
);
185 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
186 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
187 geturls_headers_opera_windows7
.update(windows7_ua_addon
);
188 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
189 geturls_headers_vivaldi_windows7
.update(windows7_ua_addon
);
190 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
191 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
192 geturls_headers_microsoft_edge_windows7
.update(windows7_ua_addon
);
193 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
194 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
195 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
196 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
197 geturls_headers
= geturls_headers_firefox_windows7
;
198 geturls_download_sleep
= 0;
200 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
201 if(outtype
=="print" and dbgenable
):
204 elif(outtype
=="log" and dbgenable
):
205 logging
.info(dbgtxt
);
207 elif(outtype
=="warning" and dbgenable
):
208 logging
.warning(dbgtxt
);
210 elif(outtype
=="error" and dbgenable
):
211 logging
.error(dbgtxt
);
213 elif(outtype
=="critical" and dbgenable
):
214 logging
.critical(dbgtxt
);
216 elif(outtype
=="exception" and dbgenable
):
217 logging
.exception(dbgtxt
);
219 elif(outtype
=="logalt" and dbgenable
):
220 logging
.log(dgblevel
, dbgtxt
);
222 elif(outtype
=="debug" and dbgenable
):
223 logging
.debug(dbgtxt
);
231 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
232 dbgout
= verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
237 def add_url_param(url
, **params
):
239 parts
= list(urlparse
.urlsplit(url
));
240 d
= dict(cgi
.parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
242 parts
[n
]=urlencode(d
);
243 return urlparse
.urlunsplit(parts
);
245 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
246 def which_exec(execfile):
247 for path
in os
.environ
["PATH"].split(":"):
248 if os
.path
.exists(path
+ "/" + execfile):
249 return path
+ "/" + execfile;
251 def listize(varlist
):
259 newlistreg
.update({ilx
: varlist
[il
]});
260 newlistrev
.update({varlist
[il
]: ilx
});
263 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
266 def twolistize(varlist
):
276 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
277 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
278 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
279 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
282 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
283 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
284 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
287 def arglistize(proexec
, *varlist
):
291 newarglist
= [proexec
];
293 if varlist
[il
][0] is not None:
294 newarglist
.append(varlist
[il
][0]);
295 if varlist
[il
][1] is not None:
296 newarglist
.append(varlist
[il
][1]);
300 def fix_header_names(header_dict
):
301 header_dict
= {k
.title(): v
for k
, v
in header_dict
.items()};
304 # hms_string by ArcGIS Python Recipes
305 # https://arcpy.wordpress.com/2012/04/20/146/
306 def hms_string(sec_elapsed
):
307 h
= int(sec_elapsed
/ (60 * 60));
308 m
= int((sec_elapsed
% (60 * 60)) / 60);
309 s
= sec_elapsed
% 60.0;
310 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
312 # get_readable_size by Lipis
313 # http://stackoverflow.com/posts/14998888/revisions
314 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
316 if(unit
!="IEC" and unit
!="SI"):
319 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
320 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
323 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
324 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
329 if abs(bytes
) < unitsize
:
330 strformat
= "%3."+str(precision
)+"f%s";
331 pre_return_val
= (strformat
% (bytes
, unit
));
332 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
333 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
334 alt_return_val
= pre_return_val
.split();
335 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
338 strformat
= "%."+str(precision
)+"f%s";
339 pre_return_val
= (strformat
% (bytes
, "YiB"));
340 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
341 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
342 alt_return_val
= pre_return_val
.split();
343 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
346 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
348 usehashtypes
= usehashtypes
.lower();
349 getfilesize
= os
.path
.getsize(infile
);
350 return_val
= get_readable_size(getfilesize
, precision
, unit
);
352 hashtypelist
= usehashtypes
.split(",");
353 openfile
= open(infile
, "rb");
354 filecontents
= openfile
.read();
357 listnumend
= len(hashtypelist
);
358 while(listnumcount
< listnumend
):
359 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
360 hashtypelistup
= hashtypelistlow
.upper();
361 filehash
= hashlib
.new(hashtypelistup
);
362 filehash
.update(filecontents
);
363 filegethash
= filehash
.hexdigest();
364 return_val
.update({hashtypelistup
: filegethash
});
368 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
370 usehashtypes
= usehashtypes
.lower();
371 getfilesize
= len(instring
);
372 return_val
= get_readable_size(getfilesize
, precision
, unit
);
374 hashtypelist
= usehashtypes
.split(",");
376 listnumend
= len(hashtypelist
);
377 while(listnumcount
< listnumend
):
378 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
379 hashtypelistup
= hashtypelistlow
.upper();
380 filehash
= hashlib
.new(hashtypelistup
);
381 if(sys
.version
[0]=="2"):
382 filehash
.update(instring
);
383 if(sys
.version
[0]>="3"):
384 filehash
.update(instring
.encode('utf-8'));
385 filegethash
= filehash
.hexdigest();
386 return_val
.update({hashtypelistup
: filegethash
});
390 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
391 if isinstance(headers
, dict):
393 if(sys
.version
[0]=="2"):
394 for headkey
, headvalue
in headers
.iteritems():
395 returnval
.append((headkey
, headvalue
));
396 if(sys
.version
[0]>="3"):
397 for headkey
, headvalue
in headers
.items():
398 returnval
.append((headkey
, headvalue
));
399 elif isinstance(headers
, list):
405 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
406 if isinstance(headers
, dict):
408 if(sys
.version
[0]=="2"):
409 for headkey
, headvalue
in headers
.iteritems():
410 returnval
.append(headkey
+": "+headvalue
);
411 if(sys
.version
[0]>="3"):
412 for headkey
, headvalue
in headers
.items():
413 returnval
.append(headkey
+": "+headvalue
);
414 elif isinstance(headers
, list):
420 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", compression_supported
), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
421 if isinstance(headers
, list):
426 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
428 elif isinstance(headers
, dict):
434 def get_httplib_support(checkvalue
=None):
435 global haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
437 returnval
.append("ftp");
438 returnval
.append("httplib");
440 returnval
.append("httplib2");
441 returnval
.append("urllib");
443 returnval
.append("urllib3");
444 returnval
.append("request3");
445 returnval
.append("request");
447 returnval
.append("requests");
449 returnval
.append("httpx");
450 returnval
.append("httpx2");
452 returnval
.append("mechanize");
454 returnval
.append("sftp");
456 returnval
.append("pysftp");
457 if(not checkvalue
is None):
458 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
459 checkvalue
= "urllib";
460 if(checkvalue
=="httplib1"):
461 checkvalue
= "httplib";
462 if(checkvalue
in returnval
):
468 def check_httplib_support(checkvalue
="urllib"):
469 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
470 checkvalue
= "urllib";
471 if(checkvalue
=="httplib1"):
472 checkvalue
= "httplib";
473 returnval
= get_httplib_support(checkvalue
);
476 def get_httplib_support_list():
477 returnval
= get_httplib_support(None);
480 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", sleep
=-1):
481 global geturls_download_sleep
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
483 sleep
= geturls_download_sleep
;
484 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
485 httplibuse
= "urllib";
486 if(httplibuse
=="httplib1"):
487 httplibuse
= "httplib";
488 if(not haverequests
and httplibuse
=="requests"):
489 httplibuse
= "urllib";
490 if(not havehttpx
and httplibuse
=="httpx"):
491 httplibuse
= "urllib";
492 if(not havehttpx
and httplibuse
=="httpx2"):
493 httplibuse
= "urllib";
494 if(not havehttpcore
and httplibuse
=="httpcore"):
495 httplibuse
= "urllib";
496 if(not havehttpcore
and httplibuse
=="httpcore2"):
497 httplibuse
= "urllib";
498 if(not havemechanize
and httplibuse
=="mechanize"):
499 httplibuse
= "urllib";
500 if(not havehttplib2
and httplibuse
=="httplib2"):
501 httplibuse
= "httplib";
502 if(not haveparamiko
and httplibuse
=="sftp"):
504 if(not havepysftp
and httplibuse
=="pysftp"):
506 urlparts
= urlparse
.urlparse(httpurl
);
507 if(isinstance(httpheaders
, list)):
508 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
509 httpheaders
= fix_header_names(httpheaders
);
510 if(httpuseragent
is not None):
511 if('User-Agent' in httpheaders
):
512 httpheaders
['User-Agent'] = httpuseragent
;
514 httpuseragent
.update({'User-Agent': httpuseragent
});
515 if(httpreferer
is not None):
516 if('Referer' in httpheaders
):
517 httpheaders
['Referer'] = httpreferer
;
519 httpuseragent
.update({'Referer': httpreferer
});
520 if(urlparts
.username
is not None or urlparts
.password
is not None):
521 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
522 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
523 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
524 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="mechanize"):
525 if(isinstance(httpheaders
, dict)):
526 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
527 geturls_opener
.addheaders
= httpheaders
;
529 if(postdata
is not None and not isinstance(postdata
, dict)):
530 postdata
= urlencode(postdata
);
531 if(httplibuse
=="urllib"):
533 if(httpmethod
=="GET"):
534 geturls_text
= geturls_opener
.open(httpurl
);
535 elif(httpmethod
=="POST"):
536 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
538 geturls_text
= geturls_opener
.open(httpurl
);
539 except HTTPError
as geturls_text_error
:
540 geturls_text
= geturls_text_error
;
541 log
.info("Error With URL "+httpurl
);
543 log
.info("Error With URL "+httpurl
);
545 except socket
.timeout
:
546 log
.info("Error With URL "+httpurl
);
548 httpcodeout
= geturls_text
.getcode();
549 httpversionout
= "1.1";
550 httpmethodout
= httpmethod
;
551 httpurlout
= geturls_text
.geturl();
552 httpheaderout
= geturls_text
.info();
553 httpheadersentout
= httpheaders
;
554 elif(httplibuse
=="request"):
556 if(httpmethod
=="GET"):
557 geturls_request
= Request(httpurl
, headers
=httpheaders
);
558 geturls_text
= urlopen(geturls_request
);
559 elif(httpmethod
=="POST"):
560 geturls_request
= Request(httpurl
, headers
=httpheaders
);
561 geturls_text
= urlopen(geturls_request
, data
=postdata
);
563 geturls_request
= Request(httpurl
, headers
=httpheaders
);
564 geturls_text
= urlopen(geturls_request
);
565 except HTTPError
as geturls_text_error
:
566 geturls_text
= geturls_text_error
;
567 log
.info("Error With URL "+httpurl
);
569 log
.info("Error With URL "+httpurl
);
571 except socket
.timeout
:
572 log
.info("Error With URL "+httpurl
);
574 httpcodeout
= geturls_text
.getcode();
575 httpversionout
= "1.1";
576 httpmethodout
= httpmethod
;
577 httpurlout
= geturls_text
.geturl();
578 httpheaderout
= geturls_text
.headers
;
579 httpheadersentout
= httpheaders
;
580 elif(httplibuse
=="request3"):
582 if(httpmethod
=="GET"):
583 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
584 elif(httpmethod
=="POST"):
585 geturls_text
= geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
587 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
588 except urllib3
.exceptions
.ConnectTimeoutError
:
589 log
.info("Error With URL "+httpurl
);
591 except urllib3
.exceptions
.ConnectError
:
592 log
.info("Error With URL "+httpurl
);
594 except urllib3
.exceptions
.MaxRetryError
:
595 log
.info("Error With URL "+httpurl
);
597 except socket
.timeout
:
598 log
.info("Error With URL "+httpurl
);
600 httpcodeout
= geturls_text
.status
;
601 httpversionout
= "1.1";
602 httpmethodout
= httpmethod
;
603 httpurlout
= geturls_text
.geturl();
604 httpheaderout
= geturls_text
.info();
605 httpheadersentout
= httpheaders
;
606 elif(httplibuse
=="httplib"):
607 if(urlparts
[0]=="http"):
608 httpconn
= HTTPConnection(urlparts
[1]);
609 elif(urlparts
[0]=="https"):
610 httpconn
= HTTPSConnection(urlparts
[1]);
613 if(postdata
is not None and not isinstance(postdata
, dict)):
614 postdata
= urlencode(postdata
);
616 if(httpmethod
=="GET"):
617 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
618 elif(httpmethod
=="POST"):
619 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
621 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
622 except socket
.timeout
:
623 log
.info("Error With URL "+httpurl
);
625 except socket
.gaierror
:
626 log
.info("Error With URL "+httpurl
);
628 geturls_text
= httpconn
.getresponse();
629 httpcodeout
= geturls_text
.status
;
630 httpversionout
= "1.1";
631 httpmethodout
= httpmethod
;
632 httpurlout
= httpurl
;
633 httpheaderout
= geturls_text
.getheaders();
634 httpheadersentout
= httpheaders
;
635 elif(httplibuse
=="httplib2"):
636 if(urlparts
[0]=="http"):
637 httpconn
= HTTPConnectionWithTimeout(urlparts
[1]);
638 elif(urlparts
[0]=="https"):
639 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1]);
642 if(postdata
is not None and not isinstance(postdata
, dict)):
643 postdata
= urlencode(postdata
);
645 if(httpmethod
=="GET"):
646 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
647 elif(httpmethod
=="POST"):
648 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
650 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
651 except socket
.timeout
:
652 log
.info("Error With URL "+httpurl
);
654 except socket
.gaierror
:
655 log
.info("Error With URL "+httpurl
);
657 geturls_text
= httpconn
.getresponse();
658 httpcodeout
= geturls_text
.status
;
659 httpversionout
= "1.1";
660 httpmethodout
= httpmethod
;
661 httpurlout
= httpurl
;
662 httpheaderout
= geturls_text
.getheaders();
663 httpheadersentout
= httpheaders
;
664 elif(httplibuse
=="urllib3"):
666 if(httpmethod
=="GET"):
667 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
668 elif(httpmethod
=="POST"):
669 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
671 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
672 except urllib3
.exceptions
.ConnectTimeoutError
:
673 log
.info("Error With URL "+httpurl
);
675 except urllib3
.exceptions
.ConnectError
:
676 log
.info("Error With URL "+httpurl
);
678 except urllib3
.exceptions
.MaxRetryError
:
679 log
.info("Error With URL "+httpurl
);
681 except socket
.timeout
:
682 log
.info("Error With URL "+httpurl
);
684 httpcodeout
= geturls_text
.status
;
685 httpversionout
= "1.1";
686 httpmethodout
= httpmethod
;
687 httpurlout
= geturls_text
.geturl();
688 httpheaderout
= geturls_text
.info();
689 httpheadersentout
= httpheaders
;
690 elif(httplibuse
=="requests"):
692 if(httpmethod
=="GET"):
693 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
694 elif(httpmethod
=="POST"):
695 geturls_text
= requests
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
697 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
698 except requests
.exceptions
.ConnectTimeout
:
699 log
.info("Error With URL "+httpurl
);
701 except requests
.exceptions
.ConnectError
:
702 log
.info("Error With URL "+httpurl
);
704 except socket
.timeout
:
705 log
.info("Error With URL "+httpurl
);
707 httpcodeout
= geturls_text
.status_code
;
708 httpversionout
= "1.1";
709 httpmethodout
= httpmethod
;
710 httpurlout
= geturls_text
.url
;
711 httpheaderout
= geturls_text
.headers
;
712 httpheadersentout
= httpheaders
;
713 elif(httplibuse
=="httpx"):
715 if(httpmethod
=="GET"):
716 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
717 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
718 elif(httpmethod
=="POST"):
719 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
720 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
722 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
723 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
724 except httpx
.ConnectTimeout
:
725 log
.info("Error With URL "+httpurl
);
727 except httpx
.ConnectError
:
728 log
.info("Error With URL "+httpurl
);
730 except socket
.timeout
:
731 log
.info("Error With URL "+httpurl
);
733 httpcodeout
= geturls_text
.status_code
;
734 httpversionout
= geturls_text
.http_version
;
735 httpmethodout
= httpmethod
;
736 httpurlout
= str(geturls_text
.url
);
737 httpheaderout
= geturls_text
.headers
;
738 httpheadersentout
= httpheaders
;
739 elif(httplibuse
=="httpx2"):
741 if(httpmethod
=="GET"):
742 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
743 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
744 elif(httpmethod
=="POST"):
745 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
746 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
748 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
749 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
750 except httpx
.ConnectTimeout
:
751 log
.info("Error With URL "+httpurl
);
753 except httpx
.ConnectError
:
754 log
.info("Error With URL "+httpurl
);
756 except socket
.timeout
:
757 log
.info("Error With URL "+httpurl
);
759 httpcodeout
= geturls_text
.status_code
;
760 httpversionout
= geturls_text
.http_version
;
761 httpmethodout
= httpmethod
;
762 httpurlout
= str(geturls_text
.url
);
763 httpheaderout
= geturls_text
.headers
;
764 httpheadersentout
= httpheaders
;
765 elif(httplibuse
=="httpcore"):
767 if(httpmethod
=="GET"):
768 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
769 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
770 elif(httpmethod
=="POST"):
771 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
772 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
774 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
775 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
776 except httpcore
.ConnectTimeout
:
777 log
.info("Error With URL "+httpurl
);
779 except httpcore
.ConnectError
:
780 log
.info("Error With URL "+httpurl
);
782 except socket
.timeout
:
783 log
.info("Error With URL "+httpurl
);
785 httpcodeout
= geturls_text
.status
;
786 httpversionout
= "1.1";
787 httpmethodout
= httpmethod
;
788 httpurlout
= str(httpurl
);
789 httpheaderout
= geturls_text
.headers
;
790 httpheadersentout
= httpheaders
;
791 elif(httplibuse
=="httpcore2"):
793 if(httpmethod
=="GET"):
794 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
795 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
796 elif(httpmethod
=="POST"):
797 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
798 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
800 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
801 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
802 except httpcore
.ConnectTimeout
:
803 log
.info("Error With URL "+httpurl
);
805 except httpcore
.ConnectError
:
806 log
.info("Error With URL "+httpurl
);
808 except socket
.timeout
:
809 log
.info("Error With URL "+httpurl
);
811 httpcodeout
= geturls_text
.status
;
812 httpversionout
= "1.1";
813 httpmethodout
= httpmethod
;
814 httpurlout
= str(httpurl
);
815 httpheaderout
= geturls_text
.headers
;
816 httpheadersentout
= httpheaders
;
817 elif(httplibuse
=="mechanize"):
818 geturls_opener
= mechanize
.Browser();
819 if(isinstance(httpheaders
, dict)):
820 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
822 geturls_opener
.addheaders
= httpheaders
;
823 geturls_opener
.set_cookiejar(httpcookie
);
824 geturls_opener
.set_handle_robots(False);
825 if(postdata
is not None and not isinstance(postdata
, dict)):
826 postdata
= urlencode(postdata
);
828 if(httpmethod
=="GET"):
829 geturls_text
= geturls_opener
.open(httpurl
);
830 elif(httpmethod
=="POST"):
831 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
833 geturls_text
= geturls_opener
.open(httpurl
);
834 except mechanize
.HTTPError
as geturls_text_error
:
835 geturls_text
= geturls_text_error
;
836 log
.info("Error With URL "+httpurl
);
838 log
.info("Error With URL "+httpurl
);
840 except socket
.timeout
:
841 log
.info("Error With URL "+httpurl
);
843 httpcodeout
= geturls_text
.code
;
844 httpversionout
= "1.1";
845 httpmethodout
= httpmethod
;
846 httpurlout
= geturls_text
.geturl();
847 httpheaderout
= geturls_text
.info();
848 reqhead
= geturls_opener
.request
;
849 httpheadersentout
= reqhead
.header_items();
851 elif(httplibuse
=="ftp"):
852 geturls_text
= download_file_from_ftp_file(httpurl
);
853 if(not geturls_text
):
855 log
.info("Downloading URL "+httpurl
);
856 returnval_content
= geturls_text
.read()[:];
857 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
858 geturls_text
.close();
859 elif(httplibuse
=="sftp"):
860 geturls_text
= download_file_from_sftp_file(httpurl
);
861 if(not geturls_text
):
863 log
.info("Downloading URL "+httpurl
);
864 returnval_content
= geturls_text
.read()[:];
865 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
866 geturls_text
.close();
868 elif(httplibuse
=="pysftp"):
869 geturls_text
= download_file_from_pysftp_file(httpurl
);
870 if(not geturls_text
):
872 log
.info("Downloading URL "+httpurl
);
873 returnval_content
= geturls_text
.read()[:];
874 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
875 geturls_text
.close();
879 if(isinstance(httpheaderout
, list)):
880 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
881 if(sys
.version
[0]=="2"):
883 prehttpheaderout
= httpheaderout
;
884 httpheaderkeys
= httpheaderout
.keys();
885 imax
= len(httpheaderkeys
);
889 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
891 except AttributeError:
893 httpheaderout
= fix_header_names(httpheaderout
);
894 if(isinstance(httpheadersentout
, list)):
895 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
896 httpheadersentout
= fix_header_names(httpheadersentout
);
897 log
.info("Downloading URL "+httpurl
);
898 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="request3" or httplibuse
=="httplib" or httplibuse
=="httplib2" or httplibuse
=="urllib3" or httplibuse
=="mechanize" or httplibuse
=="httpx" or httplibuse
=="httpx2" or httplibuse
=="httpcore" or httplibuse
=="httpcore2"):
899 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
900 if(sys
.version
[0]=="2"):
901 strbuf
= StringIO(geturls_text
.read());
902 if(sys
.version
[0]>="3"):
903 strbuf
= BytesIO(geturls_text
.read());
904 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
905 returnval_content
= gzstrbuf
.read()[:];
906 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
907 returnval_content
= geturls_text
.read()[:];
908 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
909 returnval_content
= geturls_text
.read()[:];
910 returnval_content
= brotli
.decompress(returnval_content
);
911 geturls_text
.close();
912 elif(httplibuse
=="requests"):
913 log
.info("Downloading URL "+httpurl
);
914 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
915 if(sys
.version
[0]=="2"):
916 strbuf
= StringIO(geturls_text
.raw
.read());
917 if(sys
.version
[0]>="3"):
918 strbuf
= BytesIO(geturls_text
.raw
.read());
919 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
920 returnval_content
= gzstrbuf
.read()[:];
921 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
922 returnval_content
= geturls_text
.raw
.read()[:];
923 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
924 returnval_content
= geturls_text
.raw
.read()[:];
925 returnval_content
= brotli
.decompress(returnval_content
);
926 geturls_text
.close();
927 elif(httplibuse
=="ftp" or httplibuse
=="sftp" or httplibuse
=="pysftp"):
931 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
934 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1):
935 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
936 exec_time_start
= time
.time();
937 myhash
= hashlib
.new("sha1");
938 if(sys
.version
[0]=="2"):
939 myhash
.update(httpurl
);
940 myhash
.update(str(buffersize
));
941 myhash
.update(str(exec_time_start
));
942 if(sys
.version
[0]>="3"):
943 myhash
.update(httpurl
.encode('utf-8'));
944 myhash
.update(str(buffersize
).encode('utf-8'));
945 myhash
.update(str(exec_time_start
).encode('utf-8'));
946 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
948 sleep
= geturls_download_sleep
;
949 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
950 httplibuse
= "urllib";
951 if(httplibuse
=="httplib1"):
952 httplibuse
= "httplib";
953 if(not haverequests
and httplibuse
=="requests"):
954 httplibuse
= "urllib";
955 if(not havehttpx
and httplibuse
=="httpx"):
956 httplibuse
= "urllib";
957 if(not havehttpx
and httplibuse
=="httpx2"):
958 httplibuse
= "urllib";
959 if(not havehttpcore
and httplibuse
=="httpcore"):
960 httplibuse
= "urllib";
961 if(not havehttpcore
and httplibuse
=="httpcore2"):
962 httplibuse
= "urllib";
963 if(not havemechanize
and httplibuse
=="mechanize"):
964 httplibuse
= "urllib";
965 if(not havehttplib2
and httplibuse
=="httplib2"):
966 httplibuse
= "httplib";
967 if(not haveparamiko
and httplibuse
=="sftp"):
969 if(not haveparamiko
and httplibuse
=="pysftp"):
971 urlparts
= urlparse
.urlparse(httpurl
);
972 if(isinstance(httpheaders
, list)):
973 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
974 httpheaders
= fix_header_names(httpheaders
);
975 if(httpuseragent
is not None):
976 if('User-Agent' in httpheaders
):
977 httpheaders
['User-Agent'] = httpuseragent
;
979 httpuseragent
.update({'User-Agent': httpuseragent
});
980 if(httpreferer
is not None):
981 if('Referer' in httpheaders
):
982 httpheaders
['Referer'] = httpreferer
;
984 httpuseragent
.update({'Referer': httpreferer
});
985 if(urlparts
.username
is not None or urlparts
.password
is not None):
986 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
987 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
988 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
989 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="mechanize"):
990 if(isinstance(httpheaders
, dict)):
991 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
992 geturls_opener
.addheaders
= httpheaders
;
994 if(httplibuse
=="urllib"):
996 if(httpmethod
=="GET"):
997 geturls_text
= geturls_opener
.open(httpurl
);
998 elif(httpmethod
=="POST"):
999 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
1001 geturls_text
= geturls_opener
.open(httpurl
);
1002 except HTTPError
as geturls_text_error
:
1003 geturls_text
= geturls_text_error
;
1004 log
.info("Error With URL "+httpurl
);
1006 log
.info("Error With URL "+httpurl
);
1008 except socket
.timeout
:
1009 log
.info("Error With URL "+httpurl
);
1011 except socket
.timeout
:
1012 log
.info("Error With URL "+httpurl
);
1014 httpcodeout
= geturls_text
.getcode();
1015 httpversionout
= "1.1";
1016 httpmethodout
= httpmethod
;
1017 httpurlout
= geturls_text
.geturl();
1018 httpheaderout
= geturls_text
.info();
1019 httpheadersentout
= httpheaders
;
1020 elif(httplibuse
=="request"):
1022 if(httpmethod
=="GET"):
1023 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1024 geturls_text
= urlopen(geturls_request
);
1025 elif(httpmethod
=="POST"):
1026 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1027 geturls_text
= urlopen(geturls_request
, data
=postdata
);
1029 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1030 geturls_text
= urlopen(geturls_request
);
1031 except HTTPError
as geturls_text_error
:
1032 geturls_text
= geturls_text_error
;
1033 log
.info("Error With URL "+httpurl
);
1035 log
.info("Error With URL "+httpurl
);
1037 except socket
.timeout
:
1038 log
.info("Error With URL "+httpurl
);
1040 httpcodeout
= geturls_text
.getcode();
1041 httpversionout
= "1.1";
1042 httpmethodout
= httpmethod
;
1043 httpurlout
= geturls_text
.geturl();
1044 httpheaderout
= geturls_text
.headers
;
1045 httpheadersentout
= httpheaders
;
1046 elif(httplibuse
=="request3"):
1048 if(httpmethod
=="GET"):
1049 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1050 elif(httpmethod
=="POST"):
1051 geturls_text
= geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
1053 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1054 except urllib3
.exceptions
.ConnectTimeoutError
:
1055 log
.info("Error With URL "+httpurl
);
1057 except urllib3
.exceptions
.ConnectError
:
1058 log
.info("Error With URL "+httpurl
);
1060 except urllib3
.exceptions
.MaxRetryError
:
1061 log
.info("Error With URL "+httpurl
);
1063 except socket
.timeout
:
1064 log
.info("Error With URL "+httpurl
);
1066 httpcodeout
= geturls_text
.status
;
1067 httpversionout
= "1.1";
1068 httpmethodout
= httpmethod
;
1069 httpurlout
= geturls_text
.geturl();
1070 httpheaderout
= geturls_text
.info();
1071 httpheadersentout
= httpheaders
;
1072 elif(httplibuse
=="httplib"):
1073 if(urlparts
[0]=="http"):
1074 httpconn
= HTTPConnection(urlparts
[1]);
1075 elif(urlparts
[0]=="https"):
1076 httpconn
= HTTPSConnection(urlparts
[1]);
1079 if(postdata
is not None and not isinstance(postdata
, dict)):
1080 postdata
= urlencode(postdata
);
1082 if(httpmethod
=="GET"):
1083 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1084 elif(httpmethod
=="POST"):
1085 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1087 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1088 except socket
.timeout
:
1089 log
.info("Error With URL "+httpurl
);
1091 except socket
.gaierror
:
1092 log
.info("Error With URL "+httpurl
);
1094 geturls_text
= httpconn
.getresponse();
1095 httpcodeout
= geturls_text
.status
;
1096 httpversionout
= "1.1";
1097 httpmethodout
= httpmethod
;
1098 httpurlout
= httpurl
;
1099 httpheaderout
= geturls_text
.getheaders();
1100 httpheadersentout
= httpheaders
;
1101 elif(httplibuse
=="httplib2"):
1103 if(httpmethod
=="GET"):
1104 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1105 elif(httpmethod
=="POST"):
1106 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1108 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1109 except socket
.timeout
:
1110 log
.info("Error With URL "+httpurl
);
1112 except socket
.gaierror
:
1113 log
.info("Error With URL "+httpurl
);
1115 geturls_text
= httpconn
.getresponse();
1116 httpcodeout
= geturls_text
.status
;
1117 httpversionout
= "1.1";
1118 httpmethodout
= httpmethod
;
1119 httpurlout
= httpurl
;
1120 httpheaderout
= geturls_text
.getheaders();
1121 httpheadersentout
= httpheaders
;
1122 elif(httplibuse
=="urllib3"):
1124 if(httpmethod
=="GET"):
1125 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1126 elif(httpmethod
=="POST"):
1127 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
1129 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1130 except urllib3
.exceptions
.ConnectTimeoutError
:
1131 log
.info("Error With URL "+httpurl
);
1133 except urllib3
.exceptions
.ConnectError
:
1134 log
.info("Error With URL "+httpurl
);
1136 except urllib3
.exceptions
.MaxRetryError
:
1137 log
.info("Error With URL "+httpurl
);
1139 except socket
.timeout
:
1140 log
.info("Error With URL "+httpurl
);
1142 httpcodeout
= geturls_text
.status
;
1143 httpversionout
= "1.1";
1144 httpmethodout
= httpmethod
;
1145 httpurlout
= geturls_text
.geturl();
1146 httpheaderout
= geturls_text
.info();
1147 httpheadersentout
= httpheaders
;
1148 elif(httplibuse
=="requests"):
1150 if(httpmethod
=="GET"):
1151 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1152 elif(httpmethod
=="POST"):
1153 geturls_text
= requests
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1155 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1156 except requests
.exceptions
.ConnectTimeout
:
1157 log
.info("Error With URL "+httpurl
);
1159 except requests
.exceptions
.ConnectError
:
1160 log
.info("Error With URL "+httpurl
);
1162 except socket
.timeout
:
1163 log
.info("Error With URL "+httpurl
);
1165 httpcodeout
= geturls_text
.status_code
;
1166 httpversionout
= "1.1";
1167 httpmethodout
= httpmethod
;
1168 httpurlout
= geturls_text
.url
;
1169 httpheaderout
= geturls_text
.headers
;
1170 httpheadersentout
= httpheaders
;
1171 elif(httplibuse
=="httpx"):
1173 if(httpmethod
=="GET"):
1174 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1175 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1176 elif(httpmethod
=="POST"):
1177 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1178 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1180 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1181 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1182 except httpx
.ConnectTimeout
:
1183 log
.info("Error With URL "+httpurl
);
1185 except httpx
.ConnectError
:
1186 log
.info("Error With URL "+httpurl
);
1188 except socket
.timeout
:
1189 log
.info("Error With URL "+httpurl
);
1191 httpcodeout
= geturls_text
.status_code
;
1192 httpversionout
= geturls_text
.http_version
;
1193 httpmethodout
= httpmethod
;
1194 httpurlout
= str(geturls_text
.url
);
1195 httpheaderout
= geturls_text
.headers
;
1196 httpheadersentout
= httpheaders
;
1197 elif(httplibuse
=="httpx2"):
1199 if(httpmethod
=="GET"):
1200 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
1201 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1202 elif(httpmethod
=="POST"):
1203 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
1204 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1206 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
1207 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1208 except httpx
.ConnectTimeout
:
1209 log
.info("Error With URL "+httpurl
);
1211 except httpx
.ConnectError
:
1212 log
.info("Error With URL "+httpurl
);
1214 except socket
.timeout
:
1215 log
.info("Error With URL "+httpurl
);
1217 httpcodeout
= geturls_text
.status_code
;
1218 httpversionout
= geturls_text
.http_version
;
1219 httpmethodout
= httpmethod
;
1220 httpurlout
= str(geturls_text
.url
);
1221 httpheaderout
= geturls_text
.headers
;
1222 httpheadersentout
= httpheaders
;
1223 elif(httplibuse
=="httpcore"):
1225 if(httpmethod
=="GET"):
1226 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
1227 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1228 elif(httpmethod
=="POST"):
1229 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
1230 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
1232 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
1233 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1234 except httpcore
.ConnectTimeout
:
1235 log
.info("Error With URL "+httpurl
);
1237 except httpcore
.ConnectError
:
1238 log
.info("Error With URL "+httpurl
);
1240 except socket
.timeout
:
1241 log
.info("Error With URL "+httpurl
);
1243 httpcodeout
= geturls_text
.status
;
1244 httpversionout
= "1.1";
1245 httpmethodout
= httpmethod
;
1246 httpurlout
= str(httpurl
);
1247 httpheaderout
= geturls_text
.headers
;
1248 httpheadersentout
= httpheaders
;
1249 elif(httplibuse
=="httpcore2"):
1251 if(httpmethod
=="GET"):
1252 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
1253 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1254 elif(httpmethod
=="POST"):
1255 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
1256 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
1258 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
1259 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1260 except httpcore
.ConnectTimeout
:
1261 log
.info("Error With URL "+httpurl
);
1263 except httpcore
.ConnectError
:
1264 log
.info("Error With URL "+httpurl
);
1266 except socket
.timeout
:
1267 log
.info("Error With URL "+httpurl
);
1269 httpcodeout
= geturls_text
.status
;
1270 httpversionout
= "1.1";
1271 httpmethodout
= httpmethod
;
1272 httpurlout
= str(httpurl
);
1273 httpheaderout
= geturls_text
.headers
;
1274 httpheadersentout
= httpheaders
;
1275 elif(httplibuse
=="mechanize"):
1276 geturls_opener
= mechanize
.Browser();
1277 if(isinstance(httpheaders
, dict)):
1278 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1280 geturls_opener
.addheaders
= httpheaders
;
1281 geturls_opener
.set_cookiejar(httpcookie
);
1282 geturls_opener
.set_handle_robots(False);
1283 if(postdata
is not None and not isinstance(postdata
, dict)):
1284 postdata
= urlencode(postdata
);
1286 if(httpmethod
=="GET"):
1287 geturls_text
= geturls_opener
.open(httpurl
);
1288 elif(httpmethod
=="POST"):
1289 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
1291 geturls_text
= geturls_opener
.open(httpurl
);
1292 except mechanize
.HTTPError
as geturls_text_error
:
1293 geturls_text
= geturls_text_error
;
1294 log
.info("Error With URL "+httpurl
);
1296 log
.info("Error With URL "+httpurl
);
1298 except socket
.timeout
:
1299 log
.info("Error With URL "+httpurl
);
1301 httpcodeout
= geturls_text
.code
;
1302 httpversionout
= "1.1";
1303 httpmethodout
= httpmethod
;
1304 httpurlout
= geturls_text
.geturl();
1305 httpheaderout
= geturls_text
.info();
1306 reqhead
= geturls_opener
.request
;
1307 httpheadersentout
= reqhead
.header_items();
1308 elif(httplibuse
=="ftp"):
1309 geturls_text
= download_file_from_ftp_file(httpurl
);
1310 if(not geturls_text
):
1312 geturls_text
.seek(0, 2);
1313 downloadsize
= geturls_text
.tell();
1314 geturls_text
.seek(0, 0);
1315 elif(httplibuse
=="sftp"):
1316 geturls_text
= download_file_from_sftp_file(httpurl
);
1317 if(not geturls_text
):
1319 geturls_text
.seek(0, 2);
1320 downloadsize
= geturls_text
.tell();
1321 geturls_text
.seek(0, 0);
1322 if(downloadsize
is not None):
1323 downloadsize
= int(downloadsize
);
1324 if downloadsize
is None: downloadsize
= 0;
1327 elif(httplibuse
=="pysftp"):
1328 geturls_text
= download_file_from_pysftp_file(httpurl
);
1329 if(not geturls_text
):
1331 geturls_text
.seek(0, 2);
1332 downloadsize
= geturls_text
.tell();
1333 geturls_text
.seek(0, 0);
1336 if(isinstance(httpheaderout
, list)):
1337 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1338 if(sys
.version
[0]=="2"):
1340 prehttpheaderout
= httpheaderout
;
1341 httpheaderkeys
= httpheaderout
.keys();
1342 imax
= len(httpheaderkeys
);
1346 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1348 except AttributeError:
1350 httpheaderout
= fix_header_names(httpheaderout
);
1351 if(isinstance(httpheadersentout
, list)):
1352 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1353 httpheadersentout
= fix_header_names(httpheadersentout
);
1354 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="request3" or httplibuse
=="httplib" or httplibuse
=="httplib2" or httplibuse
=="urllib3" or httplibuse
=="requests" or httplibuse
=="mechanize" or httplibuse
=="httpx" or httplibuse
=="httpx2" or httplibuse
=="httpcore" or httplibuse
=="httpcore2"):
1355 downloadsize
= httpheaderout
.get('Content-Length');
1356 if(downloadsize
is not None):
1357 downloadsize
= int(downloadsize
);
1358 if downloadsize
is None: downloadsize
= 0;
1361 log
.info("Downloading URL "+httpurl
);
1362 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="request3" or httplibuse
=="httplib" or httplibuse
=="httplib2" or httplibuse
=="urllib3" or httplibuse
=="mechanize" or httplibuse
=="httpx" or httplibuse
=="httpx2" or httplibuse
=="httpcore" or httplibuse
=="httpcore2" or httplibuse
=="ftp" or httplibuse
=="sftp" or httplibuse
=="pysftp"):
1363 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1364 tmpfilename
= f
.name
;
1366 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
1367 except AttributeError:
1369 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1374 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
1376 databytes
= geturls_text
.read(buffersize
);
1377 if not databytes
: break;
1378 datasize
= len(databytes
);
1379 fulldatasize
= datasize
+ fulldatasize
;
1382 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1383 downloaddiff
= fulldatasize
- prevdownsize
;
1384 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1385 prevdownsize
= fulldatasize
;
1388 elif(httplibuse
=="requests"):
1389 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1390 tmpfilename
= f
.name
;
1392 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
1393 except AttributeError:
1395 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1400 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
};
1401 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
1402 datasize
= len(databytes
);
1403 fulldatasize
= datasize
+ fulldatasize
;
1406 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1407 downloaddiff
= fulldatasize
- prevdownsize
;
1408 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1409 prevdownsize
= fulldatasize
;
1414 geturls_text
.close();
1415 exec_time_end
= time
.time();
1416 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1417 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1420 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1421 global geturls_download_sleep
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcorei
, haveparamiko
, havepysftp
;
1423 sleep
= geturls_download_sleep
;
1424 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
1425 httplibuse
= "urllib";
1426 if(httplibuse
=="httplib1"):
1427 httplibuse
= "httplib";
1428 if(not haverequests
and httplibuse
=="requests"):
1429 httplibuse
= "urllib";
1430 if(not havehttpx
and httplibuse
=="httpx"):
1431 httplibuse
= "urllib";
1432 if(not havehttpx
and httplibuse
=="httpx2"):
1433 httplibuse
= "urllib";
1434 if(not havehttpcore
and httplibuse
=="httpcore"):
1435 httplibuse
= "urllib";
1436 if(not havehttpcore
and httplibuse
=="httpcore2"):
1437 httplibuse
= "urllib";
1438 if(not havemechanize
and httplibuse
=="mechanize"):
1439 httplibuse
= "urllib";
1440 if(not havehttplib2
and httplibuse
=="httplib2"):
1441 httplibuse
= "httplib";
1442 if(not haveparamiko
and httplibuse
=="sftp"):
1444 if(not havepysftp
and httplibuse
=="pysftp"):
1446 if(not outfile
=="-"):
1447 outpath
= outpath
.rstrip(os
.path
.sep
);
1448 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1449 if(not os
.path
.exists(outpath
)):
1450 os
.makedirs(outpath
);
1451 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1453 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1455 pretmpfilename
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, buffersize
[0], sleep
);
1456 if(not pretmpfilename
):
1458 tmpfilename
= pretmpfilename
['Filename'];
1459 downloadsize
= os
.path
.getsize(tmpfilename
);
1461 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1462 exec_time_start
= time
.time();
1463 shutil
.move(tmpfilename
, filepath
);
1465 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1466 except AttributeError:
1468 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1473 exec_time_end
= time
.time();
1474 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1475 if(os
.path
.exists(tmpfilename
)):
1476 os
.remove(tmpfilename
);
1477 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1478 if(outfile
=="-" and sys
.version
[0]=="2"):
1479 pretmpfilename
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, buffersize
[0], sleep
);
1480 if(not pretmpfilename
):
1482 tmpfilename
= pretmpfilename
['Filename'];
1483 downloadsize
= os
.path
.getsize(tmpfilename
);
1486 exec_time_start
= time
.time();
1487 with
open(tmpfilename
, 'rb') as ft
:
1490 databytes
= ft
.read(buffersize
[1]);
1491 if not databytes
: break;
1492 datasize
= len(databytes
);
1493 fulldatasize
= datasize
+ fulldatasize
;
1496 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1497 downloaddiff
= fulldatasize
- prevdownsize
;
1498 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1499 prevdownsize
= fulldatasize
;
1502 fdata
= f
.getvalue();
1505 os
.remove(tmpfilename
);
1506 exec_time_end
= time
.time();
1507 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1508 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1509 if(outfile
=="-" and sys
.version
[0]>="3"):
1510 pretmpfilename
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, buffersize
[0], sleep
);
1511 tmpfilename
= pretmpfilename
['Filename'];
1512 downloadsize
= os
.path
.getsize(tmpfilename
);
1515 exec_time_start
= time
.time();
1516 with
open(tmpfilename
, 'rb') as ft
:
1519 databytes
= ft
.read(buffersize
[1]);
1520 if not databytes
: break;
1521 datasize
= len(databytes
);
1522 fulldatasize
= datasize
+ fulldatasize
;
1525 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1526 downloaddiff
= fulldatasize
- prevdownsize
;
1527 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1528 prevdownsize
= fulldatasize
;
1531 fdata
= f
.getvalue();
1534 os
.remove(tmpfilename
);
1535 exec_time_end
= time
.time();
1536 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1537 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1540 def download_from_url_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1541 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib", sleep
);
1544 def download_from_url_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1545 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request", sleep
);
1548 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1549 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request3", sleep
);
1552 def download_from_url_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1553 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib", sleep
);
1556 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1557 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib2", sleep
);
1560 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1561 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib3", sleep
);
1564 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1565 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "requests", sleep
);
1568 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1569 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx", sleep
);
1572 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1573 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx2", sleep
);
1576 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1577 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore", sleep
);
1580 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1581 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore2", sleep
);
1584 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1585 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "mechanize", sleep
);
1588 def download_from_url_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1589 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "ftp", sleep
);
1592 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1593 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "sftp", sleep
);
1596 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1597 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "pysftp", sleep
);
1600 def download_from_url_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1601 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib", buffersize
, sleep
);
1604 def download_from_url_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1605 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request", buffersize
, sleep
);
1608 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1609 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request3", buffersize
, sleep
);
1612 def download_from_url_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1613 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib", buffersize
, sleep
);
1616 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1617 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib2", buffersize
, sleep
);
1620 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1621 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib3", buffersize
, sleep
);
1624 def download_from_url_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1625 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "requests", buffersize
, sleep
);
1628 def download_from_url_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1629 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx", buffersize
, sleep
);
1632 def download_from_url_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1633 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx2", buffersize
, sleep
);
1636 def download_from_url_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1637 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore", buffersize
, sleep
);
1640 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1641 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore2", buffersize
, sleep
);
1644 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1645 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "mechanize", buffersize
, sleep
);
1648 def download_from_url_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1649 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "ftp", buffersize
, sleep
);
1652 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1653 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "sftp", buffersize
, sleep
);
1656 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1657 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "pysftp", buffersize
, sleep
);
1660 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1661 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib", outfile
, outpath
, buffersize
, sleep
);
1664 def download_from_url_to_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1665 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request", outfile
, outpath
, buffersize
, sleep
);
1668 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1669 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request3", outfile
, outpath
, buffersize
, sleep
);
1672 def download_from_url_to_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1673 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib", outfile
, outpath
, buffersize
, sleep
);
1676 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1677 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib2", outfile
, outpath
, buffersize
, sleep
);
1680 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1681 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib3", outfile
, outpath
, buffersize
, sleep
);
1684 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1685 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "requests", outfile
, outpath
, buffersize
, sleep
);
1688 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1689 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx", outfile
, outpath
, buffersize
, sleep
);
1692 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1693 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx2", outfile
, outpath
, buffersize
, sleep
);
1696 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1697 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore", outfile
, outpath
, buffersize
, sleep
);
1700 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1701 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore2", outfile
, outpath
, buffersize
, sleep
);
1704 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1705 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "mechanize", outfile
, outpath
, buffersize
, sleep
);
1708 def download_from_url_to_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1709 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "ftp", outfile
, outpath
, buffersize
, sleep
);
1712 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1713 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "sftp", outfile
, outpath
, buffersize
, sleep
);
1716 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1717 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "pysftp", outfile
, outpath
, buffersize
, sleep
);
1720 def download_file_from_ftp_file(url
):
1721 urlparts
= urlparse
.urlparse(url
);
1722 file_name
= os
.path
.basename(urlparts
.path
);
1723 file_dir
= os
.path
.dirname(urlparts
.path
);
1724 if(urlparts
.username
is not None):
1725 ftp_username
= urlparts
.username
;
1727 ftp_username
= "anonymous";
1728 if(urlparts
.password
is not None):
1729 ftp_password
= urlparts
.password
;
1730 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
1731 ftp_password
= "anonymous";
1734 if(urlparts
.scheme
=="ftp"):
1736 elif(urlparts
.scheme
=="ftps"):
1740 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
1742 ftp_port
= urlparts
.port
;
1743 if(urlparts
.port
is None):
1746 ftp
.connect(urlparts
.hostname
, ftp_port
);
1747 except socket
.gaierror
:
1748 log
.info("Error With URL "+httpurl
);
1750 except socket
.timeout
:
1751 log
.info("Error With URL "+httpurl
);
1753 ftp
.login(urlparts
.username
, urlparts
.password
);
1754 if(urlparts
.scheme
=="ftps"):
1756 ftpfile
= BytesIO();
1757 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
1758 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
1763 def download_file_from_ftp_string(url
):
1764 ftpfile
= download_file_from_ftp_file(url
);
1765 return ftpfile
.read();
1767 def upload_file_to_ftp_file(ftpfile
, url
):
1768 urlparts
= urlparse
.urlparse(url
);
1769 file_name
= os
.path
.basename(urlparts
.path
);
1770 file_dir
= os
.path
.dirname(urlparts
.path
);
1771 if(urlparts
.username
is not None):
1772 ftp_username
= urlparts
.username
;
1774 ftp_username
= "anonymous";
1775 if(urlparts
.password
is not None):
1776 ftp_password
= urlparts
.password
;
1777 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
1778 ftp_password
= "anonymous";
1781 if(urlparts
.scheme
=="ftp"):
1783 elif(urlparts
.scheme
=="ftps"):
1787 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
1789 ftp_port
= urlparts
.port
;
1790 if(urlparts
.port
is None):
1793 ftp
.connect(urlparts
.hostname
, ftp_port
);
1794 except socket
.gaierror
:
1795 log
.info("Error With URL "+httpurl
);
1797 except socket
.timeout
:
1798 log
.info("Error With URL "+httpurl
);
1800 ftp
.login(urlparts
.username
, urlparts
.password
);
1801 if(urlparts
.scheme
=="ftps"):
1803 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
1808 def upload_file_to_ftp_string(ftpstring
, url
):
1809 ftpfileo
= BytesIO(ftpstring
);
1810 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
1815 def download_file_from_sftp_file(url
):
1816 urlparts
= urlparse
.urlparse(url
);
1817 file_name
= os
.path
.basename(urlparts
.path
);
1818 file_dir
= os
.path
.dirname(urlparts
.path
);
1819 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
1821 sftp_port
= urlparts
.port
;
1822 if(urlparts
.port
is None):
1825 sftp_port
= urlparts
.port
;
1826 if(urlparts
.username
is not None):
1827 sftp_username
= urlparts
.username
;
1829 sftp_username
= "anonymous";
1830 if(urlparts
.password
is not None):
1831 sftp_password
= urlparts
.password
;
1832 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
1833 sftp_password
= "anonymous";
1836 if(urlparts
.scheme
!="sftp"):
1838 ssh
= paramiko
.SSHClient();
1839 ssh
.load_system_host_keys();
1840 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
1842 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
1843 except paramiko
.ssh_exception
.SSHException
:
1845 except socket
.gaierror
:
1846 log
.info("Error With URL "+httpurl
);
1848 except socket
.timeout
:
1849 log
.info("Error With URL "+httpurl
);
1851 sftp
= ssh
.open_sftp();
1852 sftpfile
= BytesIO();
1853 sftp
.getfo(urlparts
.path
, sftpfile
);
1856 sftpfile
.seek(0, 0);
1859 def download_file_from_sftp_file(url
):
1863 def download_file_from_sftp_string(url
):
1864 sftpfile
= download_file_from_sftp_file(url
);
1865 return sftpfile
.read();
1867 def download_file_from_ftp_string(url
):
1871 def upload_file_to_sftp_file(sftpfile
, url
):
1872 urlparts
= urlparse
.urlparse(url
);
1873 file_name
= os
.path
.basename(urlparts
.path
);
1874 file_dir
= os
.path
.dirname(urlparts
.path
);
1875 sftp_port
= urlparts
.port
;
1876 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
1878 if(urlparts
.port
is None):
1881 sftp_port
= urlparts
.port
;
1882 if(urlparts
.username
is not None):
1883 sftp_username
= urlparts
.username
;
1885 sftp_username
= "anonymous";
1886 if(urlparts
.password
is not None):
1887 sftp_password
= urlparts
.password
;
1888 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
1889 sftp_password
= "anonymous";
1892 if(urlparts
.scheme
!="sftp"):
1894 ssh
= paramiko
.SSHClient();
1895 ssh
.load_system_host_keys();
1896 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
1898 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
1899 except paramiko
.ssh_exception
.SSHException
:
1901 except socket
.gaierror
:
1902 log
.info("Error With URL "+httpurl
);
1904 except socket
.timeout
:
1905 log
.info("Error With URL "+httpurl
);
1907 sftp
= ssh
.open_sftp();
1908 sftp
.putfo(sftpfile
, urlparts
.path
);
1911 sftpfile
.seek(0, 0);
1914 def upload_file_to_sftp_file(sftpfile
, url
):
1918 def upload_file_to_sftp_string(sftpstring
, url
):
1919 sftpfileo
= BytesIO(sftpstring
);
1920 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
1924 def upload_file_to_sftp_string(url
):
1929 def download_file_from_pysftp_file(url
):
1930 urlparts
= urlparse
.urlparse(url
);
1931 file_name
= os
.path
.basename(urlparts
.path
);
1932 file_dir
= os
.path
.dirname(urlparts
.path
);
1933 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
1935 sftp_port
= urlparts
.port
;
1936 if(urlparts
.port
is None):
1939 sftp_port
= urlparts
.port
;
1940 if(urlparts
.username
is not None):
1941 sftp_username
= urlparts
.username
;
1943 sftp_username
= "anonymous";
1944 if(urlparts
.password
is not None):
1945 sftp_password
= urlparts
.password
;
1946 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
1947 sftp_password
= "anonymous";
1950 if(urlparts
.scheme
!="sftp"):
1953 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
1954 except paramiko
.ssh_exception
.SSHException
:
1956 except socket
.gaierror
:
1957 log
.info("Error With URL "+httpurl
);
1959 except socket
.timeout
:
1960 log
.info("Error With URL "+httpurl
);
1962 sftp
= ssh
.open_sftp();
1963 sftpfile
= BytesIO();
1964 sftp
.getfo(urlparts
.path
, sftpfile
);
1967 sftpfile
.seek(0, 0);
1970 def download_file_from_pysftp_file(url
):
1974 def download_file_from_pysftp_string(url
):
1975 sftpfile
= download_file_from_pysftp_file(url
);
1976 return sftpfile
.read();
1978 def download_file_from_ftp_string(url
):
1982 def upload_file_to_pysftp_file(sftpfile
, url
):
1983 urlparts
= urlparse
.urlparse(url
);
1984 file_name
= os
.path
.basename(urlparts
.path
);
1985 file_dir
= os
.path
.dirname(urlparts
.path
);
1986 sftp_port
= urlparts
.port
;
1987 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
1989 if(urlparts
.port
is None):
1992 sftp_port
= urlparts
.port
;
1993 if(urlparts
.username
is not None):
1994 sftp_username
= urlparts
.username
;
1996 sftp_username
= "anonymous";
1997 if(urlparts
.password
is not None):
1998 sftp_password
= urlparts
.password
;
1999 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2000 sftp_password
= "anonymous";
2003 if(urlparts
.scheme
!="sftp"):
2006 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
2007 except paramiko
.ssh_exception
.SSHException
:
2009 except socket
.gaierror
:
2010 log
.info("Error With URL "+httpurl
);
2012 except socket
.timeout
:
2013 log
.info("Error With URL "+httpurl
);
2015 sftp
= ssh
.open_sftp();
2016 sftp
.putfo(sftpfile
, urlparts
.path
);
2019 sftpfile
.seek(0, 0);
2022 def upload_file_to_pysftp_file(sftpfile
, url
):
2026 def upload_file_to_pysftp_string(sftpstring
, url
):
2027 sftpfileo
= BytesIO(sftpstring
);
2028 sftpfile
= upload_file_to_pysftp_files(ftpfileo
, url
);
2032 def upload_file_to_pysftp_string(url
):