4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwget.py - Last Update: 9/24/2023 Ver. 1.5.0 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, gzip
, time
, argparse
, cgi
, subprocess
, socket
, email
.utils
, datetime
, time
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
29 havemechanize
= False;
34 havemechanize
= False;
55 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
83 if(sys
.version
[0]=="2"):
85 from cStringIO
import StringIO
;
87 from StringIO
import StringIO
;
88 # From http://python-future.org/compatible_idioms.html
89 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
90 from urllib
import urlencode
;
91 from urllib
import urlopen
as urlopenalt
;
92 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
, URLError
, build_opener
, HTTPCookieProcessor
;
93 import urlparse
, cookielib
;
94 from httplib
import HTTPConnection
, HTTPSConnection
;
95 if(sys
.version
[0]>="3"):
96 from io
import StringIO
, BytesIO
;
97 # From http://python-future.org/compatible_idioms.html
98 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
99 from urllib
.request
import urlopen
, Request
, install_opener
, build_opener
, HTTPCookieProcessor
;
100 from urllib
.error
import HTTPError
, URLError
;
101 import urllib
.parse
as urlparse
;
102 import http
.cookiejar
as cookielib
;
103 from http
.client
import HTTPConnection
, HTTPSConnection
;
105 __program_name__
= "PyWWW-Get";
106 __program_alt_name__
= "PyWWWGet";
107 __program_small_name__
= "wwwget";
108 __project__
= __program_name__
;
109 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
110 __version_info__
= (1, 5, 0, "RC 1", 1);
111 __version_date_info__
= (2023, 9, 24, "RC 1", 1);
112 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
113 __revision__
= __version_info__
[3];
114 __revision_id__
= "$Id$";
115 if(__version_info__
[4] is not None):
116 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
117 if(__version_info__
[4] is None):
118 __version_date_plusrc__
= __version_date__
;
119 if(__version_info__
[3] is not None):
120 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
121 if(__version_info__
[3] is None):
122 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
124 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
126 pytempdir
= tempfile
.gettempdir();
128 PyBitness
= platform
.architecture();
129 if(PyBitness
=="32bit" or PyBitness
=="32"):
131 elif(PyBitness
=="64bit" or PyBitness
=="64"):
136 compression_supported
= "gzip, deflate";
138 compression_supported
= "gzip, deflate, br";
140 compression_supported
= "gzip, deflate";
142 geturls_cj
= cookielib
.CookieJar();
143 windowsNT4_ua_string
= "Windows NT 4.0";
144 windowsNT4_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
145 windows2k_ua_string
= "Windows NT 5.0";
146 windows2k_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
147 windowsXP_ua_string
= "Windows NT 5.1";
148 windowsXP_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
149 windowsXP64_ua_string
= "Windows NT 5.2; Win64; x64";
150 windowsXP64_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
151 windows7_ua_string
= "Windows NT 6.1; Win64; x64";
152 windows7_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
153 windows8_ua_string
= "Windows NT 6.2; Win64; x64";
154 windows8_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
155 windows81_ua_string
= "Windows NT 6.3; Win64; x64";
156 windows81_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
157 windows10_ua_string
= "Windows NT 10.0; Win64; x64";
158 windows10_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
159 windows11_ua_string
= "Windows NT 11.0; Win64; x64";
160 windows11_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
161 geturls_ua_firefox_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:109.0) Gecko/20100101 Firefox/117.0";
162 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
163 geturls_ua_chrome_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
164 geturls_ua_chromium_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
165 geturls_ua_palemoon_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
166 geturls_ua_opera_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
167 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
168 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; Trident/7.0; rv:11.0) like Gecko";
169 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
170 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
171 if(platform
.python_implementation()!=""):
172 py_implementation
= platform
.python_implementation();
173 if(platform
.python_implementation()==""):
174 py_implementation
= "Python";
175 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=py_implementation
, pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
176 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
177 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
178 geturls_ua
= geturls_ua_firefox_windows7
;
179 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
180 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
181 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
182 geturls_headers_chrome_windows7
.update(windows7_ua_addon
);
183 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
184 geturls_headers_chromium_windows7
.update(windows7_ua_addon
);
185 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
186 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
187 geturls_headers_opera_windows7
.update(windows7_ua_addon
);
188 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
189 geturls_headers_vivaldi_windows7
.update(windows7_ua_addon
);
190 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
191 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
192 geturls_headers_microsoft_edge_windows7
.update(windows7_ua_addon
);
193 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
194 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
195 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
196 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
197 geturls_headers
= geturls_headers_firefox_windows7
;
198 geturls_download_sleep
= 0;
200 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
201 if(outtype
=="print" and dbgenable
):
204 elif(outtype
=="log" and dbgenable
):
205 logging
.info(dbgtxt
);
207 elif(outtype
=="warning" and dbgenable
):
208 logging
.warning(dbgtxt
);
210 elif(outtype
=="error" and dbgenable
):
211 logging
.error(dbgtxt
);
213 elif(outtype
=="critical" and dbgenable
):
214 logging
.critical(dbgtxt
);
216 elif(outtype
=="exception" and dbgenable
):
217 logging
.exception(dbgtxt
);
219 elif(outtype
=="logalt" and dbgenable
):
220 logging
.log(dgblevel
, dbgtxt
);
222 elif(outtype
=="debug" and dbgenable
):
223 logging
.debug(dbgtxt
);
231 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
232 dbgout
= verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
237 def add_url_param(url
, **params
):
239 parts
= list(urlparse
.urlsplit(url
));
240 d
= dict(cgi
.parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
242 parts
[n
]=urlencode(d
);
243 return urlparse
.urlunsplit(parts
);
245 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
246 def which_exec(execfile):
247 for path
in os
.environ
["PATH"].split(":"):
248 if os
.path
.exists(path
+ "/" + execfile):
249 return path
+ "/" + execfile;
251 def listize(varlist
):
259 newlistreg
.update({ilx
: varlist
[il
]});
260 newlistrev
.update({varlist
[il
]: ilx
});
263 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
266 def twolistize(varlist
):
276 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
277 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
278 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
279 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
282 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
283 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
284 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
287 def arglistize(proexec
, *varlist
):
291 newarglist
= [proexec
];
293 if varlist
[il
][0] is not None:
294 newarglist
.append(varlist
[il
][0]);
295 if varlist
[il
][1] is not None:
296 newarglist
.append(varlist
[il
][1]);
300 def fix_header_names(header_dict
):
301 header_dict
= {k
.title(): v
for k
, v
in header_dict
.items()};
304 # hms_string by ArcGIS Python Recipes
305 # https://arcpy.wordpress.com/2012/04/20/146/
306 def hms_string(sec_elapsed
):
307 h
= int(sec_elapsed
/ (60 * 60));
308 m
= int((sec_elapsed
% (60 * 60)) / 60);
309 s
= sec_elapsed
% 60.0;
310 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
312 # get_readable_size by Lipis
313 # http://stackoverflow.com/posts/14998888/revisions
314 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
316 if(unit
!="IEC" and unit
!="SI"):
319 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
320 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
323 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
324 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
329 if abs(bytes
) < unitsize
:
330 strformat
= "%3."+str(precision
)+"f%s";
331 pre_return_val
= (strformat
% (bytes
, unit
));
332 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
333 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
334 alt_return_val
= pre_return_val
.split();
335 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
338 strformat
= "%."+str(precision
)+"f%s";
339 pre_return_val
= (strformat
% (bytes
, "YiB"));
340 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
341 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
342 alt_return_val
= pre_return_val
.split();
343 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
346 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
348 usehashtypes
= usehashtypes
.lower();
349 getfilesize
= os
.path
.getsize(infile
);
350 return_val
= get_readable_size(getfilesize
, precision
, unit
);
352 hashtypelist
= usehashtypes
.split(",");
353 openfile
= open(infile
, "rb");
354 filecontents
= openfile
.read();
357 listnumend
= len(hashtypelist
);
358 while(listnumcount
< listnumend
):
359 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
360 hashtypelistup
= hashtypelistlow
.upper();
361 filehash
= hashlib
.new(hashtypelistup
);
362 filehash
.update(filecontents
);
363 filegethash
= filehash
.hexdigest();
364 return_val
.update({hashtypelistup
: filegethash
});
368 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
370 usehashtypes
= usehashtypes
.lower();
371 getfilesize
= len(instring
);
372 return_val
= get_readable_size(getfilesize
, precision
, unit
);
374 hashtypelist
= usehashtypes
.split(",");
376 listnumend
= len(hashtypelist
);
377 while(listnumcount
< listnumend
):
378 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
379 hashtypelistup
= hashtypelistlow
.upper();
380 filehash
= hashlib
.new(hashtypelistup
);
381 if(sys
.version
[0]=="2"):
382 filehash
.update(instring
);
383 if(sys
.version
[0]>="3"):
384 filehash
.update(instring
.encode('utf-8'));
385 filegethash
= filehash
.hexdigest();
386 return_val
.update({hashtypelistup
: filegethash
});
390 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
391 if isinstance(headers
, dict):
393 if(sys
.version
[0]=="2"):
394 for headkey
, headvalue
in headers
.iteritems():
395 returnval
.append((headkey
, headvalue
));
396 if(sys
.version
[0]>="3"):
397 for headkey
, headvalue
in headers
.items():
398 returnval
.append((headkey
, headvalue
));
399 elif isinstance(headers
, list):
405 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
406 if isinstance(headers
, dict):
408 if(sys
.version
[0]=="2"):
409 for headkey
, headvalue
in headers
.iteritems():
410 returnval
.append(headkey
+": "+headvalue
);
411 if(sys
.version
[0]>="3"):
412 for headkey
, headvalue
in headers
.items():
413 returnval
.append(headkey
+": "+headvalue
);
414 elif isinstance(headers
, list):
420 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", compression_supported
), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
421 if isinstance(headers
, list):
426 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
428 elif isinstance(headers
, dict):
434 def get_httplib_support(checkvalue
=None):
435 global haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
437 returnval
.append("ftp");
438 returnval
.append("httplib");
440 returnval
.append("httplib2");
441 returnval
.append("urllib");
443 returnval
.append("urllib3");
444 returnval
.append("request3");
445 returnval
.append("request");
447 returnval
.append("requests");
449 returnval
.append("httpx");
450 returnval
.append("httpx2");
452 returnval
.append("mechanize");
454 returnval
.append("sftp");
456 returnval
.append("pysftp");
457 if(not checkvalue
is None):
458 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
459 checkvalue
= "urllib";
460 if(checkvalue
=="httplib1"):
461 checkvalue
= "httplib";
462 if(checkvalue
in returnval
):
468 def check_httplib_support(checkvalue
="urllib"):
469 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
470 checkvalue
= "urllib";
471 if(checkvalue
=="httplib1"):
472 checkvalue
= "httplib";
473 returnval
= get_httplib_support(checkvalue
);
476 def get_httplib_support_list():
477 returnval
= get_httplib_support(None);
480 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", sleep
=-1):
481 global geturls_download_sleep
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
483 sleep
= geturls_download_sleep
;
484 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
485 httplibuse
= "urllib";
486 if(httplibuse
=="httplib1"):
487 httplibuse
= "httplib";
488 if(not haverequests
and httplibuse
=="requests"):
489 httplibuse
= "urllib";
490 if(not havehttpx
and httplibuse
=="httpx"):
491 httplibuse
= "urllib";
492 if(not havehttpx
and httplibuse
=="httpx2"):
493 httplibuse
= "urllib";
494 if(not havehttpcore
and httplibuse
=="httpcore"):
495 httplibuse
= "urllib";
496 if(not havehttpcore
and httplibuse
=="httpcore2"):
497 httplibuse
= "urllib";
498 if(not havemechanize
and httplibuse
=="mechanize"):
499 httplibuse
= "urllib";
500 if(not havehttplib2
and httplibuse
=="httplib2"):
501 httplibuse
= "httplib";
502 if(not haveparamiko
and httplibuse
=="sftp"):
504 if(not havepysftp
and httplibuse
=="pysftp"):
506 urlparts
= urlparse
.urlparse(httpurl
);
507 if(isinstance(httpheaders
, list)):
508 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
509 httpheaders
= fix_header_names(httpheaders
);
510 if(httpuseragent
is not None):
511 if('User-Agent' in httpheaders
):
512 httpheaders
['User-Agent'] = httpuseragent
;
514 httpuseragent
.update({'User-Agent': httpuseragent
});
515 if(httpreferer
is not None):
516 if('Referer' in httpheaders
):
517 httpheaders
['Referer'] = httpreferer
;
519 httpuseragent
.update({'Referer': httpreferer
});
520 if(urlparts
.username
is not None or urlparts
.password
is not None):
521 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
522 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
523 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
524 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="mechanize"):
525 if(isinstance(httpheaders
, dict)):
526 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
527 geturls_opener
.addheaders
= httpheaders
;
529 if(postdata
is not None and not isinstance(postdata
, dict)):
530 postdata
= urlencode(postdata
);
531 if(httplibuse
=="urllib"):
533 if(httpmethod
=="GET"):
534 geturls_text
= geturls_opener
.open(httpurl
);
535 elif(httpmethod
=="POST"):
536 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
538 geturls_text
= geturls_opener
.open(httpurl
);
539 except HTTPError
as geturls_text_error
:
540 geturls_text
= geturls_text_error
;
541 log
.info("Error With URL "+httpurl
);
543 log
.info("Error With URL "+httpurl
);
545 except socket
.timeout
:
546 log
.info("Error With URL "+httpurl
);
548 httpcodeout
= geturls_text
.getcode();
549 httpcodereason
= geturls_text
.reason
;
550 httpversionout
= "1.1";
551 httpmethodout
= httpmethod
;
552 httpurlout
= geturls_text
.geturl();
553 httpheaderout
= geturls_text
.info();
554 httpheadersentout
= httpheaders
;
555 elif(httplibuse
=="request"):
557 if(httpmethod
=="GET"):
558 geturls_request
= Request(httpurl
, headers
=httpheaders
);
559 geturls_text
= urlopen(geturls_request
);
560 elif(httpmethod
=="POST"):
561 geturls_request
= Request(httpurl
, headers
=httpheaders
);
562 geturls_text
= urlopen(geturls_request
, data
=postdata
);
564 geturls_request
= Request(httpurl
, headers
=httpheaders
);
565 geturls_text
= urlopen(geturls_request
);
566 except HTTPError
as geturls_text_error
:
567 geturls_text
= geturls_text_error
;
568 log
.info("Error With URL "+httpurl
);
570 log
.info("Error With URL "+httpurl
);
572 except socket
.timeout
:
573 log
.info("Error With URL "+httpurl
);
575 httpcodeout
= geturls_text
.getcode();
576 httpcodereason
= geturls_text
.reason
;
577 httpversionout
= "1.1";
578 httpmethodout
= httpmethod
;
579 httpurlout
= geturls_text
.geturl();
580 httpheaderout
= geturls_text
.headers
;
581 httpheadersentout
= httpheaders
;
582 elif(httplibuse
=="request3"):
584 if(httpmethod
=="GET"):
585 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
586 elif(httpmethod
=="POST"):
587 geturls_text
= geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
589 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
590 except urllib3
.exceptions
.ConnectTimeoutError
:
591 log
.info("Error With URL "+httpurl
);
593 except urllib3
.exceptions
.ConnectError
:
594 log
.info("Error With URL "+httpurl
);
596 except urllib3
.exceptions
.MaxRetryError
:
597 log
.info("Error With URL "+httpurl
);
599 except socket
.timeout
:
600 log
.info("Error With URL "+httpurl
);
602 httpcodeout
= geturls_text
.status
;
603 httpcodereason
= geturls_text
.reason
;
604 httpversionout
= "1.1";
605 httpmethodout
= httpmethod
;
606 httpurlout
= geturls_text
.geturl();
607 httpheaderout
= geturls_text
.info();
608 httpheadersentout
= httpheaders
;
609 elif(httplibuse
=="httplib"):
610 if(urlparts
[0]=="http"):
611 httpconn
= HTTPConnection(urlparts
[1]);
612 elif(urlparts
[0]=="https"):
613 httpconn
= HTTPSConnection(urlparts
[1]);
616 if(postdata
is not None and not isinstance(postdata
, dict)):
617 postdata
= urlencode(postdata
);
619 if(httpmethod
=="GET"):
620 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
621 elif(httpmethod
=="POST"):
622 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
624 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
625 except socket
.timeout
:
626 log
.info("Error With URL "+httpurl
);
628 except socket
.gaierror
:
629 log
.info("Error With URL "+httpurl
);
631 geturls_text
= httpconn
.getresponse();
632 httpcodeout
= geturls_text
.status
;
633 httpcodereason
= geturls_text
.reason
;
634 httpversionout
= "1.1";
635 httpmethodout
= httpmethod
;
636 httpurlout
= httpurl
;
637 httpheaderout
= geturls_text
.getheaders();
638 httpheadersentout
= httpheaders
;
639 elif(httplibuse
=="httplib2"):
640 if(urlparts
[0]=="http"):
641 httpconn
= HTTPConnectionWithTimeout(urlparts
[1]);
642 elif(urlparts
[0]=="https"):
643 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1]);
646 if(postdata
is not None and not isinstance(postdata
, dict)):
647 postdata
= urlencode(postdata
);
649 if(httpmethod
=="GET"):
650 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
651 elif(httpmethod
=="POST"):
652 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
654 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
655 except socket
.timeout
:
656 log
.info("Error With URL "+httpurl
);
658 except socket
.gaierror
:
659 log
.info("Error With URL "+httpurl
);
661 geturls_text
= httpconn
.getresponse();
662 httpcodeout
= geturls_text
.status
;
663 httpcodereason
= geturls_text
.reason
;
664 httpversionout
= "1.1";
665 httpmethodout
= httpmethod
;
666 httpurlout
= httpurl
;
667 httpheaderout
= geturls_text
.getheaders();
668 httpheadersentout
= httpheaders
;
669 elif(httplibuse
=="urllib3"):
671 if(httpmethod
=="GET"):
672 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
673 elif(httpmethod
=="POST"):
674 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
676 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
677 except urllib3
.exceptions
.ConnectTimeoutError
:
678 log
.info("Error With URL "+httpurl
);
680 except urllib3
.exceptions
.ConnectError
:
681 log
.info("Error With URL "+httpurl
);
683 except urllib3
.exceptions
.MaxRetryError
:
684 log
.info("Error With URL "+httpurl
);
686 except socket
.timeout
:
687 log
.info("Error With URL "+httpurl
);
689 httpcodeout
= geturls_text
.status
;
690 httpcodereason
= geturls_text
.reason
;
691 httpversionout
= "1.1";
692 httpmethodout
= httpmethod
;
693 httpurlout
= geturls_text
.geturl();
694 httpheaderout
= geturls_text
.info();
695 httpheadersentout
= httpheaders
;
696 elif(httplibuse
=="requests"):
698 reqsession
= requests
.Session();
699 if(httpmethod
=="GET"):
700 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
701 elif(httpmethod
=="POST"):
702 geturls_text
= reqsession
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
704 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
705 except requests
.exceptions
.ConnectTimeout
:
706 log
.info("Error With URL "+httpurl
);
708 except requests
.exceptions
.ConnectError
:
709 log
.info("Error With URL "+httpurl
);
711 except socket
.timeout
:
712 log
.info("Error With URL "+httpurl
);
714 httpcodeout
= geturls_text
.status_code
;
715 httpcodereason
= geturls_text
.reason
;
716 httpversionout
= "1.1";
717 httpmethodout
= httpmethod
;
718 httpurlout
= geturls_text
.url
;
719 httpheaderout
= geturls_text
.headers
;
720 httpheadersentout
= geturls_text
.request
.headers
;
721 elif(httplibuse
=="httpx"):
723 if(httpmethod
=="GET"):
724 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
725 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
726 elif(httpmethod
=="POST"):
727 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
728 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
730 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
731 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
732 except httpx
.ConnectTimeout
:
733 log
.info("Error With URL "+httpurl
);
735 except httpx
.ConnectError
:
736 log
.info("Error With URL "+httpurl
);
738 except socket
.timeout
:
739 log
.info("Error With URL "+httpurl
);
741 httpcodeout
= geturls_text
.status_code
;
742 httpcodereason
= geturls_text
.reason
;
743 httpversionout
= geturls_text
.http_version
;
744 httpmethodout
= httpmethod
;
745 httpurlout
= str(geturls_text
.url
);
746 httpheaderout
= geturls_text
.headers
;
747 httpheadersentout
= geturls_text
.request
.headers
;
748 elif(httplibuse
=="httpx2"):
750 if(httpmethod
=="GET"):
751 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
752 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
753 elif(httpmethod
=="POST"):
754 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
755 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
757 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
758 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
759 except httpx
.ConnectTimeout
:
760 log
.info("Error With URL "+httpurl
);
762 except httpx
.ConnectError
:
763 log
.info("Error With URL "+httpurl
);
765 except socket
.timeout
:
766 log
.info("Error With URL "+httpurl
);
768 httpcodeout
= geturls_text
.status_code
;
769 httpcodereason
= geturls_text
.reason
;
770 httpversionout
= geturls_text
.http_version
;
771 httpmethodout
= httpmethod
;
772 httpurlout
= str(geturls_text
.url
);
773 httpheaderout
= geturls_text
.headers
;
774 httpheadersentout
= geturls_text
.request
.headers
;
775 elif(httplibuse
=="httpcore"):
777 if(httpmethod
=="GET"):
778 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
779 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
780 elif(httpmethod
=="POST"):
781 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
782 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
784 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
785 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
786 except httpcore
.ConnectTimeout
:
787 log
.info("Error With URL "+httpurl
);
789 except httpcore
.ConnectError
:
790 log
.info("Error With URL "+httpurl
);
792 except socket
.timeout
:
793 log
.info("Error With URL "+httpurl
);
795 httpcodeout
= geturls_text
.status
;
796 httpcodereason
= geturls_text
.reason
;
797 httpversionout
= "1.1";
798 httpmethodout
= httpmethod
;
799 httpurlout
= str(httpurl
);
800 httpheaderout
= geturls_text
.headers
;
801 httpheadersentout
= httpheaders
;
802 elif(httplibuse
=="httpcore2"):
804 if(httpmethod
=="GET"):
805 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
806 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
807 elif(httpmethod
=="POST"):
808 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
809 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
811 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
812 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
813 except httpcore
.ConnectTimeout
:
814 log
.info("Error With URL "+httpurl
);
816 except httpcore
.ConnectError
:
817 log
.info("Error With URL "+httpurl
);
819 except socket
.timeout
:
820 log
.info("Error With URL "+httpurl
);
822 httpcodeout
= geturls_text
.status
;
823 httpcodereason
= geturls_text
.reason
;
824 httpversionout
= "1.1";
825 httpmethodout
= httpmethod
;
826 httpurlout
= str(httpurl
);
827 httpheaderout
= geturls_text
.headers
;
828 httpheadersentout
= httpheaders
;
829 elif(httplibuse
=="mechanize"):
830 geturls_opener
= mechanize
.Browser();
831 if(isinstance(httpheaders
, dict)):
832 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
834 geturls_opener
.addheaders
= httpheaders
;
835 geturls_opener
.set_cookiejar(httpcookie
);
836 geturls_opener
.set_handle_robots(False);
837 if(postdata
is not None and not isinstance(postdata
, dict)):
838 postdata
= urlencode(postdata
);
840 if(httpmethod
=="GET"):
841 geturls_text
= geturls_opener
.open(httpurl
);
842 elif(httpmethod
=="POST"):
843 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
845 geturls_text
= geturls_opener
.open(httpurl
);
846 except mechanize
.HTTPError
as geturls_text_error
:
847 geturls_text
= geturls_text_error
;
848 log
.info("Error With URL "+httpurl
);
850 log
.info("Error With URL "+httpurl
);
852 except socket
.timeout
:
853 log
.info("Error With URL "+httpurl
);
855 httpcodeout
= geturls_text
.code
;
856 httpcodereason
= geturls_text
.reason
;
857 httpversionout
= "1.1";
858 httpmethodout
= httpmethod
;
859 httpurlout
= geturls_text
.geturl();
860 httpheaderout
= geturls_text
.info();
861 reqhead
= geturls_opener
.request
;
862 httpheadersentout
= reqhead
.header_items();
864 elif(httplibuse
=="ftp"):
865 geturls_text
= download_file_from_ftp_file(httpurl
);
866 if(not geturls_text
):
868 log
.info("Downloading URL "+httpurl
);
869 returnval_content
= geturls_text
.read()[:];
870 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
871 geturls_text
.close();
872 elif(httplibuse
=="sftp"):
873 geturls_text
= download_file_from_sftp_file(httpurl
);
874 if(not geturls_text
):
876 log
.info("Downloading URL "+httpurl
);
877 returnval_content
= geturls_text
.read()[:];
878 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
879 geturls_text
.close();
881 elif(httplibuse
=="pysftp"):
882 geturls_text
= download_file_from_pysftp_file(httpurl
);
883 if(not geturls_text
):
885 log
.info("Downloading URL "+httpurl
);
886 returnval_content
= geturls_text
.read()[:];
887 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
888 geturls_text
.close();
892 if(isinstance(httpheaderout
, list)):
893 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
894 if(sys
.version
[0]=="2"):
896 prehttpheaderout
= httpheaderout
;
897 httpheaderkeys
= httpheaderout
.keys();
898 imax
= len(httpheaderkeys
);
902 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
904 except AttributeError:
906 httpheaderout
= fix_header_names(httpheaderout
);
907 if(isinstance(httpheadersentout
, list)):
908 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
909 httpheadersentout
= fix_header_names(httpheadersentout
);
910 log
.info("Downloading URL "+httpurl
);
911 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="request3" or httplibuse
=="httplib" or httplibuse
=="httplib2" or httplibuse
=="urllib3" or httplibuse
=="mechanize" or httplibuse
=="httpx" or httplibuse
=="httpx2" or httplibuse
=="httpcore" or httplibuse
=="httpcore2"):
912 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
913 if(sys
.version
[0]=="2"):
914 strbuf
= StringIO(geturls_text
.read());
915 if(sys
.version
[0]>="3"):
916 strbuf
= BytesIO(geturls_text
.read());
917 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
918 returnval_content
= gzstrbuf
.read()[:];
919 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
920 returnval_content
= geturls_text
.read()[:];
921 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
922 returnval_content
= geturls_text
.read()[:];
923 returnval_content
= brotli
.decompress(returnval_content
);
924 geturls_text
.close();
925 elif(httplibuse
=="requests"):
926 log
.info("Downloading URL "+httpurl
);
927 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
928 if(sys
.version
[0]=="2"):
929 strbuf
= StringIO(geturls_text
.raw
.read());
930 if(sys
.version
[0]>="3"):
931 strbuf
= BytesIO(geturls_text
.raw
.read());
932 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
933 returnval_content
= gzstrbuf
.read()[:];
934 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
935 returnval_content
= geturls_text
.raw
.read()[:];
936 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
937 returnval_content
= geturls_text
.raw
.read()[:];
938 returnval_content
= brotli
.decompress(returnval_content
);
939 geturls_text
.close();
940 elif(httplibuse
=="ftp" or httplibuse
=="sftp" or httplibuse
=="pysftp"):
944 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
947 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1):
948 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
949 exec_time_start
= time
.time();
950 myhash
= hashlib
.new("sha1");
951 if(sys
.version
[0]=="2"):
952 myhash
.update(httpurl
);
953 myhash
.update(str(buffersize
));
954 myhash
.update(str(exec_time_start
));
955 if(sys
.version
[0]>="3"):
956 myhash
.update(httpurl
.encode('utf-8'));
957 myhash
.update(str(buffersize
).encode('utf-8'));
958 myhash
.update(str(exec_time_start
).encode('utf-8'));
959 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
961 sleep
= geturls_download_sleep
;
962 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
963 httplibuse
= "urllib";
964 if(httplibuse
=="httplib1"):
965 httplibuse
= "httplib";
966 if(not haverequests
and httplibuse
=="requests"):
967 httplibuse
= "urllib";
968 if(not havehttpx
and httplibuse
=="httpx"):
969 httplibuse
= "urllib";
970 if(not havehttpx
and httplibuse
=="httpx2"):
971 httplibuse
= "urllib";
972 if(not havehttpcore
and httplibuse
=="httpcore"):
973 httplibuse
= "urllib";
974 if(not havehttpcore
and httplibuse
=="httpcore2"):
975 httplibuse
= "urllib";
976 if(not havemechanize
and httplibuse
=="mechanize"):
977 httplibuse
= "urllib";
978 if(not havehttplib2
and httplibuse
=="httplib2"):
979 httplibuse
= "httplib";
980 if(not haveparamiko
and httplibuse
=="sftp"):
982 if(not haveparamiko
and httplibuse
=="pysftp"):
984 urlparts
= urlparse
.urlparse(httpurl
);
985 if(isinstance(httpheaders
, list)):
986 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
987 httpheaders
= fix_header_names(httpheaders
);
988 if(httpuseragent
is not None):
989 if('User-Agent' in httpheaders
):
990 httpheaders
['User-Agent'] = httpuseragent
;
992 httpuseragent
.update({'User-Agent': httpuseragent
});
993 if(httpreferer
is not None):
994 if('Referer' in httpheaders
):
995 httpheaders
['Referer'] = httpreferer
;
997 httpuseragent
.update({'Referer': httpreferer
});
998 if(urlparts
.username
is not None or urlparts
.password
is not None):
999 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1000 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1001 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1002 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="mechanize"):
1003 if(isinstance(httpheaders
, dict)):
1004 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1005 geturls_opener
.addheaders
= httpheaders
;
1007 if(httplibuse
=="urllib"):
1009 if(httpmethod
=="GET"):
1010 geturls_text
= geturls_opener
.open(httpurl
);
1011 elif(httpmethod
=="POST"):
1012 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
1014 geturls_text
= geturls_opener
.open(httpurl
);
1015 except HTTPError
as geturls_text_error
:
1016 geturls_text
= geturls_text_error
;
1017 log
.info("Error With URL "+httpurl
);
1019 log
.info("Error With URL "+httpurl
);
1021 except socket
.timeout
:
1022 log
.info("Error With URL "+httpurl
);
1024 except socket
.timeout
:
1025 log
.info("Error With URL "+httpurl
);
1027 httpcodeout
= geturls_text
.getcode();
1028 httpcodereason
= geturls_text
.reason
;
1029 httpversionout
= "1.1";
1030 httpmethodout
= httpmethod
;
1031 httpurlout
= geturls_text
.geturl();
1032 httpheaderout
= geturls_text
.info();
1033 httpheadersentout
= httpheaders
;
1034 elif(httplibuse
=="request"):
1036 if(httpmethod
=="GET"):
1037 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1038 geturls_text
= urlopen(geturls_request
);
1039 elif(httpmethod
=="POST"):
1040 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1041 geturls_text
= urlopen(geturls_request
, data
=postdata
);
1043 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1044 geturls_text
= urlopen(geturls_request
);
1045 except HTTPError
as geturls_text_error
:
1046 geturls_text
= geturls_text_error
;
1047 log
.info("Error With URL "+httpurl
);
1049 log
.info("Error With URL "+httpurl
);
1051 except socket
.timeout
:
1052 log
.info("Error With URL "+httpurl
);
1054 httpcodeout
= geturls_text
.getcode();
1055 httpcodereason
= geturls_text
.reason
;
1056 httpversionout
= "1.1";
1057 httpmethodout
= httpmethod
;
1058 httpurlout
= geturls_text
.geturl();
1059 httpheaderout
= geturls_text
.headers
;
1060 httpheadersentout
= httpheaders
;
1061 elif(httplibuse
=="request3"):
1063 if(httpmethod
=="GET"):
1064 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1065 elif(httpmethod
=="POST"):
1066 geturls_text
= geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
1068 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1069 except urllib3
.exceptions
.ConnectTimeoutError
:
1070 log
.info("Error With URL "+httpurl
);
1072 except urllib3
.exceptions
.ConnectError
:
1073 log
.info("Error With URL "+httpurl
);
1075 except urllib3
.exceptions
.MaxRetryError
:
1076 log
.info("Error With URL "+httpurl
);
1078 except socket
.timeout
:
1079 log
.info("Error With URL "+httpurl
);
1081 httpcodeout
= geturls_text
.status
;
1082 httpcodereason
= geturls_text
.reason
;
1083 httpversionout
= "1.1";
1084 httpmethodout
= httpmethod
;
1085 httpurlout
= geturls_text
.geturl();
1086 httpheaderout
= geturls_text
.info();
1087 httpheadersentout
= httpheaders
;
1088 elif(httplibuse
=="httplib"):
1089 if(urlparts
[0]=="http"):
1090 httpconn
= HTTPConnection(urlparts
[1]);
1091 elif(urlparts
[0]=="https"):
1092 httpconn
= HTTPSConnection(urlparts
[1]);
1095 if(postdata
is not None and not isinstance(postdata
, dict)):
1096 postdata
= urlencode(postdata
);
1098 if(httpmethod
=="GET"):
1099 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1100 elif(httpmethod
=="POST"):
1101 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1103 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1104 except socket
.timeout
:
1105 log
.info("Error With URL "+httpurl
);
1107 except socket
.gaierror
:
1108 log
.info("Error With URL "+httpurl
);
1110 geturls_text
= httpconn
.getresponse();
1111 httpcodeout
= geturls_text
.status
;
1112 httpcodereason
= geturls_text
.reason
;
1113 httpversionout
= "1.1";
1114 httpmethodout
= httpmethod
;
1115 httpurlout
= httpurl
;
1116 httpheaderout
= geturls_text
.getheaders();
1117 httpheadersentout
= httpheaders
;
1118 elif(httplibuse
=="httplib2"):
1120 if(httpmethod
=="GET"):
1121 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1122 elif(httpmethod
=="POST"):
1123 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1125 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1126 except socket
.timeout
:
1127 log
.info("Error With URL "+httpurl
);
1129 except socket
.gaierror
:
1130 log
.info("Error With URL "+httpurl
);
1132 geturls_text
= httpconn
.getresponse();
1133 httpcodeout
= geturls_text
.status
;
1134 httpcodereason
= geturls_text
.reason
;
1135 httpversionout
= "1.1";
1136 httpmethodout
= httpmethod
;
1137 httpurlout
= httpurl
;
1138 httpheaderout
= geturls_text
.getheaders();
1139 httpheadersentout
= httpheaders
;
1140 elif(httplibuse
=="urllib3"):
1142 if(httpmethod
=="GET"):
1143 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1144 elif(httpmethod
=="POST"):
1145 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
1147 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1148 except urllib3
.exceptions
.ConnectTimeoutError
:
1149 log
.info("Error With URL "+httpurl
);
1151 except urllib3
.exceptions
.ConnectError
:
1152 log
.info("Error With URL "+httpurl
);
1154 except urllib3
.exceptions
.MaxRetryError
:
1155 log
.info("Error With URL "+httpurl
);
1157 except socket
.timeout
:
1158 log
.info("Error With URL "+httpurl
);
1160 httpcodeout
= geturls_text
.status
;
1161 httpcodereason
= geturls_text
.reason
;
1162 httpversionout
= "1.1";
1163 httpmethodout
= httpmethod
;
1164 httpurlout
= geturls_text
.geturl();
1165 httpheaderout
= geturls_text
.info();
1166 httpheadersentout
= httpheaders
;
1167 elif(httplibuse
=="requests"):
1169 reqsession
= requests
.Session();
1170 if(httpmethod
=="GET"):
1171 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1172 elif(httpmethod
=="POST"):
1173 geturls_text
= reqsession
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1175 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1176 except requests
.exceptions
.ConnectTimeout
:
1177 log
.info("Error With URL "+httpurl
);
1179 except requests
.exceptions
.ConnectError
:
1180 log
.info("Error With URL "+httpurl
);
1182 except socket
.timeout
:
1183 log
.info("Error With URL "+httpurl
);
1185 httpcodeout
= geturls_text
.status_code
;
1186 httpcodereason
= geturls_text
.reason
;
1187 httpversionout
= "1.1";
1188 httpmethodout
= httpmethod
;
1189 httpurlout
= geturls_text
.url
;
1190 httpheaderout
= geturls_text
.headers
;
1191 httpheadersentout
= geturls_text
.request
.headers
;
1192 elif(httplibuse
=="httpx"):
1194 if(httpmethod
=="GET"):
1195 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1196 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1197 elif(httpmethod
=="POST"):
1198 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1199 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1201 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1202 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1203 except httpx
.ConnectTimeout
:
1204 log
.info("Error With URL "+httpurl
);
1206 except httpx
.ConnectError
:
1207 log
.info("Error With URL "+httpurl
);
1209 except socket
.timeout
:
1210 log
.info("Error With URL "+httpurl
);
1212 httpcodeout
= geturls_text
.status_code
;
1213 httpcodereason
= geturls_text
.reason
;
1214 httpversionout
= geturls_text
.http_version
;
1215 httpmethodout
= httpmethod
;
1216 httpurlout
= str(geturls_text
.url
);
1217 httpheaderout
= geturls_text
.headers
;
1218 httpheadersentout
= geturls_text
.request
.headers
;
1219 elif(httplibuse
=="httpx2"):
1221 if(httpmethod
=="GET"):
1222 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
1223 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1224 elif(httpmethod
=="POST"):
1225 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
1226 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1228 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
1229 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1230 except httpx
.ConnectTimeout
:
1231 log
.info("Error With URL "+httpurl
);
1233 except httpx
.ConnectError
:
1234 log
.info("Error With URL "+httpurl
);
1236 except socket
.timeout
:
1237 log
.info("Error With URL "+httpurl
);
1239 httpcodeout
= geturls_text
.status_code
;
1240 httpcodereason
= geturls_text
.reason
;
1241 httpversionout
= geturls_text
.http_version
;
1242 httpmethodout
= httpmethod
;
1243 httpurlout
= str(geturls_text
.url
);
1244 httpheaderout
= geturls_text
.headers
;
1245 httpheadersentout
= geturls_text
.request
.headers
;
1246 elif(httplibuse
=="httpcore"):
1248 if(httpmethod
=="GET"):
1249 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
1250 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1251 elif(httpmethod
=="POST"):
1252 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
1253 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
1255 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
1256 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1257 except httpcore
.ConnectTimeout
:
1258 log
.info("Error With URL "+httpurl
);
1260 except httpcore
.ConnectError
:
1261 log
.info("Error With URL "+httpurl
);
1263 except socket
.timeout
:
1264 log
.info("Error With URL "+httpurl
);
1266 httpcodeout
= geturls_text
.status
;
1267 httpcodereason
= geturls_text
.reason
;
1268 httpversionout
= "1.1";
1269 httpmethodout
= httpmethod
;
1270 httpurlout
= str(httpurl
);
1271 httpheaderout
= geturls_text
.headers
;
1272 httpheadersentout
= httpheaders
;
1273 elif(httplibuse
=="httpcore2"):
1275 if(httpmethod
=="GET"):
1276 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
1277 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1278 elif(httpmethod
=="POST"):
1279 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
1280 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
1282 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
1283 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1284 except httpcore
.ConnectTimeout
:
1285 log
.info("Error With URL "+httpurl
);
1287 except httpcore
.ConnectError
:
1288 log
.info("Error With URL "+httpurl
);
1290 except socket
.timeout
:
1291 log
.info("Error With URL "+httpurl
);
1293 httpcodeout
= geturls_text
.status
;
1294 httpcodereason
= geturls_text
.reason
;
1295 httpversionout
= "1.1";
1296 httpmethodout
= httpmethod
;
1297 httpurlout
= str(httpurl
);
1298 httpheaderout
= geturls_text
.headers
;
1299 httpheadersentout
= httpheaders
;
1300 elif(httplibuse
=="mechanize"):
1301 geturls_opener
= mechanize
.Browser();
1302 if(isinstance(httpheaders
, dict)):
1303 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1305 geturls_opener
.addheaders
= httpheaders
;
1306 geturls_opener
.set_cookiejar(httpcookie
);
1307 geturls_opener
.set_handle_robots(False);
1308 if(postdata
is not None and not isinstance(postdata
, dict)):
1309 postdata
= urlencode(postdata
);
1311 if(httpmethod
=="GET"):
1312 geturls_text
= geturls_opener
.open(httpurl
);
1313 elif(httpmethod
=="POST"):
1314 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
1316 geturls_text
= geturls_opener
.open(httpurl
);
1317 except mechanize
.HTTPError
as geturls_text_error
:
1318 geturls_text
= geturls_text_error
;
1319 log
.info("Error With URL "+httpurl
);
1321 log
.info("Error With URL "+httpurl
);
1323 except socket
.timeout
:
1324 log
.info("Error With URL "+httpurl
);
1326 httpcodeout
= geturls_text
.code
;
1327 httpcodereason
= geturls_text
.reason
;
1328 httpversionout
= "1.1";
1329 httpmethodout
= httpmethod
;
1330 httpurlout
= geturls_text
.geturl();
1331 httpheaderout
= geturls_text
.info();
1332 reqhead
= geturls_opener
.request
;
1333 httpheadersentout
= reqhead
.header_items();
1334 elif(httplibuse
=="ftp"):
1335 geturls_text
= download_file_from_ftp_file(httpurl
);
1336 if(not geturls_text
):
1338 geturls_text
.seek(0, 2);
1339 downloadsize
= geturls_text
.tell();
1340 geturls_text
.seek(0, 0);
1341 elif(httplibuse
=="sftp"):
1342 geturls_text
= download_file_from_sftp_file(httpurl
);
1343 if(not geturls_text
):
1345 geturls_text
.seek(0, 2);
1346 downloadsize
= geturls_text
.tell();
1347 geturls_text
.seek(0, 0);
1348 if(downloadsize
is not None):
1349 downloadsize
= int(downloadsize
);
1350 if downloadsize
is None: downloadsize
= 0;
1353 elif(httplibuse
=="pysftp"):
1354 geturls_text
= download_file_from_pysftp_file(httpurl
);
1355 if(not geturls_text
):
1357 geturls_text
.seek(0, 2);
1358 downloadsize
= geturls_text
.tell();
1359 geturls_text
.seek(0, 0);
1362 if(isinstance(httpheaderout
, list)):
1363 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1364 if(sys
.version
[0]=="2"):
1366 prehttpheaderout
= httpheaderout
;
1367 httpheaderkeys
= httpheaderout
.keys();
1368 imax
= len(httpheaderkeys
);
1372 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1374 except AttributeError:
1376 httpheaderout
= fix_header_names(httpheaderout
);
1377 if(isinstance(httpheadersentout
, list)):
1378 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1379 httpheadersentout
= fix_header_names(httpheadersentout
);
1380 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="request3" or httplibuse
=="httplib" or httplibuse
=="httplib2" or httplibuse
=="urllib3" or httplibuse
=="requests" or httplibuse
=="mechanize" or httplibuse
=="httpx" or httplibuse
=="httpx2" or httplibuse
=="httpcore" or httplibuse
=="httpcore2"):
1381 downloadsize
= httpheaderout
.get('Content-Length');
1382 if(downloadsize
is not None):
1383 downloadsize
= int(downloadsize
);
1384 if downloadsize
is None: downloadsize
= 0;
1387 log
.info("Downloading URL "+httpurl
);
1388 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="request3" or httplibuse
=="httplib" or httplibuse
=="httplib2" or httplibuse
=="urllib3" or httplibuse
=="mechanize" or httplibuse
=="httpx" or httplibuse
=="httpx2" or httplibuse
=="httpcore" or httplibuse
=="httpcore2" or httplibuse
=="ftp" or httplibuse
=="sftp" or httplibuse
=="pysftp"):
1389 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1390 tmpfilename
= f
.name
;
1392 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
1393 except AttributeError:
1395 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1400 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1402 databytes
= geturls_text
.read(buffersize
);
1403 if not databytes
: break;
1404 datasize
= len(databytes
);
1405 fulldatasize
= datasize
+ fulldatasize
;
1408 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1409 downloaddiff
= fulldatasize
- prevdownsize
;
1410 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1411 prevdownsize
= fulldatasize
;
1414 elif(httplibuse
=="requests"):
1415 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1416 tmpfilename
= f
.name
;
1418 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
1419 except AttributeError:
1421 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1426 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1427 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
1428 datasize
= len(databytes
);
1429 fulldatasize
= datasize
+ fulldatasize
;
1432 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1433 downloaddiff
= fulldatasize
- prevdownsize
;
1434 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1435 prevdownsize
= fulldatasize
;
1440 geturls_text
.close();
1441 exec_time_end
= time
.time();
1442 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1443 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1446 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1447 global geturls_download_sleep
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcorei
, haveparamiko
, havepysftp
;
1449 sleep
= geturls_download_sleep
;
1450 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
1451 httplibuse
= "urllib";
1452 if(httplibuse
=="httplib1"):
1453 httplibuse
= "httplib";
1454 if(not haverequests
and httplibuse
=="requests"):
1455 httplibuse
= "urllib";
1456 if(not havehttpx
and httplibuse
=="httpx"):
1457 httplibuse
= "urllib";
1458 if(not havehttpx
and httplibuse
=="httpx2"):
1459 httplibuse
= "urllib";
1460 if(not havehttpcore
and httplibuse
=="httpcore"):
1461 httplibuse
= "urllib";
1462 if(not havehttpcore
and httplibuse
=="httpcore2"):
1463 httplibuse
= "urllib";
1464 if(not havemechanize
and httplibuse
=="mechanize"):
1465 httplibuse
= "urllib";
1466 if(not havehttplib2
and httplibuse
=="httplib2"):
1467 httplibuse
= "httplib";
1468 if(not haveparamiko
and httplibuse
=="sftp"):
1470 if(not havepysftp
and httplibuse
=="pysftp"):
1472 if(not outfile
=="-"):
1473 outpath
= outpath
.rstrip(os
.path
.sep
);
1474 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1475 if(not os
.path
.exists(outpath
)):
1476 os
.makedirs(outpath
);
1477 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1479 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1481 pretmpfilename
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, buffersize
[0], sleep
);
1482 if(not pretmpfilename
):
1484 tmpfilename
= pretmpfilename
['Filename'];
1485 downloadsize
= os
.path
.getsize(tmpfilename
);
1487 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1488 exec_time_start
= time
.time();
1489 shutil
.move(tmpfilename
, filepath
);
1491 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1492 except AttributeError:
1494 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1499 exec_time_end
= time
.time();
1500 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1501 if(os
.path
.exists(tmpfilename
)):
1502 os
.remove(tmpfilename
);
1503 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1504 if(outfile
=="-" and sys
.version
[0]=="2"):
1505 pretmpfilename
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, buffersize
[0], sleep
);
1506 if(not pretmpfilename
):
1508 tmpfilename
= pretmpfilename
['Filename'];
1509 downloadsize
= os
.path
.getsize(tmpfilename
);
1512 exec_time_start
= time
.time();
1513 with
open(tmpfilename
, 'rb') as ft
:
1516 databytes
= ft
.read(buffersize
[1]);
1517 if not databytes
: break;
1518 datasize
= len(databytes
);
1519 fulldatasize
= datasize
+ fulldatasize
;
1522 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1523 downloaddiff
= fulldatasize
- prevdownsize
;
1524 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1525 prevdownsize
= fulldatasize
;
1528 fdata
= f
.getvalue();
1531 os
.remove(tmpfilename
);
1532 exec_time_end
= time
.time();
1533 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1534 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1535 if(outfile
=="-" and sys
.version
[0]>="3"):
1536 pretmpfilename
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, buffersize
[0], sleep
);
1537 tmpfilename
= pretmpfilename
['Filename'];
1538 downloadsize
= os
.path
.getsize(tmpfilename
);
1541 exec_time_start
= time
.time();
1542 with
open(tmpfilename
, 'rb') as ft
:
1545 databytes
= ft
.read(buffersize
[1]);
1546 if not databytes
: break;
1547 datasize
= len(databytes
);
1548 fulldatasize
= datasize
+ fulldatasize
;
1551 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1552 downloaddiff
= fulldatasize
- prevdownsize
;
1553 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1554 prevdownsize
= fulldatasize
;
1557 fdata
= f
.getvalue();
1560 os
.remove(tmpfilename
);
1561 exec_time_end
= time
.time();
1562 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1563 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1566 def download_from_url_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1567 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib", sleep
);
1570 def download_from_url_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1571 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request", sleep
);
1574 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1575 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request3", sleep
);
1578 def download_from_url_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1579 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib", sleep
);
1582 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1583 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib2", sleep
);
1586 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1587 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib3", sleep
);
1590 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1591 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "requests", sleep
);
1594 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1595 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx", sleep
);
1598 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1599 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx2", sleep
);
1602 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1603 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore", sleep
);
1606 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1607 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore2", sleep
);
1610 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1611 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "mechanize", sleep
);
1614 def download_from_url_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1615 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "ftp", sleep
);
1618 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1619 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "sftp", sleep
);
1622 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1623 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "pysftp", sleep
);
1626 def download_from_url_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1627 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib", buffersize
, sleep
);
1630 def download_from_url_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1631 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request", buffersize
, sleep
);
1634 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1635 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request3", buffersize
, sleep
);
1638 def download_from_url_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1639 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib", buffersize
, sleep
);
1642 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1643 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib2", buffersize
, sleep
);
1646 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1647 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib3", buffersize
, sleep
);
1650 def download_from_url_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1651 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "requests", buffersize
, sleep
);
1654 def download_from_url_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1655 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx", buffersize
, sleep
);
1658 def download_from_url_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1659 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx2", buffersize
, sleep
);
1662 def download_from_url_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1663 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore", buffersize
, sleep
);
1666 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1667 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore2", buffersize
, sleep
);
1670 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1671 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "mechanize", buffersize
, sleep
);
1674 def download_from_url_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1675 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "ftp", buffersize
, sleep
);
1678 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1679 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "sftp", buffersize
, sleep
);
1682 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1683 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "pysftp", buffersize
, sleep
);
1686 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1687 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib", outfile
, outpath
, buffersize
, sleep
);
1690 def download_from_url_to_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1691 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request", outfile
, outpath
, buffersize
, sleep
);
1694 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1695 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request3", outfile
, outpath
, buffersize
, sleep
);
1698 def download_from_url_to_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1699 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib", outfile
, outpath
, buffersize
, sleep
);
1702 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1703 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib2", outfile
, outpath
, buffersize
, sleep
);
1706 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1707 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib3", outfile
, outpath
, buffersize
, sleep
);
1710 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1711 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "requests", outfile
, outpath
, buffersize
, sleep
);
1714 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1715 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx", outfile
, outpath
, buffersize
, sleep
);
1718 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1719 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx2", outfile
, outpath
, buffersize
, sleep
);
1722 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1723 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore", outfile
, outpath
, buffersize
, sleep
);
1726 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1727 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore2", outfile
, outpath
, buffersize
, sleep
);
1730 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1731 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "mechanize", outfile
, outpath
, buffersize
, sleep
);
1734 def download_from_url_to_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1735 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "ftp", outfile
, outpath
, buffersize
, sleep
);
1738 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1739 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "sftp", outfile
, outpath
, buffersize
, sleep
);
1742 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1743 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "pysftp", outfile
, outpath
, buffersize
, sleep
);
1746 def download_file_from_ftp_file(url
):
1747 urlparts
= urlparse
.urlparse(url
);
1748 file_name
= os
.path
.basename(urlparts
.path
);
1749 file_dir
= os
.path
.dirname(urlparts
.path
);
1750 if(urlparts
.username
is not None):
1751 ftp_username
= urlparts
.username
;
1753 ftp_username
= "anonymous";
1754 if(urlparts
.password
is not None):
1755 ftp_password
= urlparts
.password
;
1756 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
1757 ftp_password
= "anonymous";
1760 if(urlparts
.scheme
=="ftp"):
1762 elif(urlparts
.scheme
=="ftps"):
1766 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
1768 ftp_port
= urlparts
.port
;
1769 if(urlparts
.port
is None):
1772 ftp
.connect(urlparts
.hostname
, ftp_port
);
1773 except socket
.gaierror
:
1774 log
.info("Error With URL "+httpurl
);
1776 except socket
.timeout
:
1777 log
.info("Error With URL "+httpurl
);
1779 ftp
.login(urlparts
.username
, urlparts
.password
);
1780 if(urlparts
.scheme
=="ftps"):
1782 ftpfile
= BytesIO();
1783 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
1784 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
1789 def download_file_from_ftp_string(url
):
1790 ftpfile
= download_file_from_ftp_file(url
);
1791 return ftpfile
.read();
1793 def upload_file_to_ftp_file(ftpfile
, url
):
1794 urlparts
= urlparse
.urlparse(url
);
1795 file_name
= os
.path
.basename(urlparts
.path
);
1796 file_dir
= os
.path
.dirname(urlparts
.path
);
1797 if(urlparts
.username
is not None):
1798 ftp_username
= urlparts
.username
;
1800 ftp_username
= "anonymous";
1801 if(urlparts
.password
is not None):
1802 ftp_password
= urlparts
.password
;
1803 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
1804 ftp_password
= "anonymous";
1807 if(urlparts
.scheme
=="ftp"):
1809 elif(urlparts
.scheme
=="ftps"):
1813 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
1815 ftp_port
= urlparts
.port
;
1816 if(urlparts
.port
is None):
1819 ftp
.connect(urlparts
.hostname
, ftp_port
);
1820 except socket
.gaierror
:
1821 log
.info("Error With URL "+httpurl
);
1823 except socket
.timeout
:
1824 log
.info("Error With URL "+httpurl
);
1826 ftp
.login(urlparts
.username
, urlparts
.password
);
1827 if(urlparts
.scheme
=="ftps"):
1829 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
1834 def upload_file_to_ftp_string(ftpstring
, url
):
1835 ftpfileo
= BytesIO(ftpstring
);
1836 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
1841 def download_file_from_sftp_file(url
):
1842 urlparts
= urlparse
.urlparse(url
);
1843 file_name
= os
.path
.basename(urlparts
.path
);
1844 file_dir
= os
.path
.dirname(urlparts
.path
);
1845 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
1847 sftp_port
= urlparts
.port
;
1848 if(urlparts
.port
is None):
1851 sftp_port
= urlparts
.port
;
1852 if(urlparts
.username
is not None):
1853 sftp_username
= urlparts
.username
;
1855 sftp_username
= "anonymous";
1856 if(urlparts
.password
is not None):
1857 sftp_password
= urlparts
.password
;
1858 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
1859 sftp_password
= "anonymous";
1862 if(urlparts
.scheme
!="sftp"):
1864 ssh
= paramiko
.SSHClient();
1865 ssh
.load_system_host_keys();
1866 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
1868 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
1869 except paramiko
.ssh_exception
.SSHException
:
1871 except socket
.gaierror
:
1872 log
.info("Error With URL "+httpurl
);
1874 except socket
.timeout
:
1875 log
.info("Error With URL "+httpurl
);
1877 sftp
= ssh
.open_sftp();
1878 sftpfile
= BytesIO();
1879 sftp
.getfo(urlparts
.path
, sftpfile
);
1882 sftpfile
.seek(0, 0);
1885 def download_file_from_sftp_file(url
):
1889 def download_file_from_sftp_string(url
):
1890 sftpfile
= download_file_from_sftp_file(url
);
1891 return sftpfile
.read();
1893 def download_file_from_ftp_string(url
):
1897 def upload_file_to_sftp_file(sftpfile
, url
):
1898 urlparts
= urlparse
.urlparse(url
);
1899 file_name
= os
.path
.basename(urlparts
.path
);
1900 file_dir
= os
.path
.dirname(urlparts
.path
);
1901 sftp_port
= urlparts
.port
;
1902 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
1904 if(urlparts
.port
is None):
1907 sftp_port
= urlparts
.port
;
1908 if(urlparts
.username
is not None):
1909 sftp_username
= urlparts
.username
;
1911 sftp_username
= "anonymous";
1912 if(urlparts
.password
is not None):
1913 sftp_password
= urlparts
.password
;
1914 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
1915 sftp_password
= "anonymous";
1918 if(urlparts
.scheme
!="sftp"):
1920 ssh
= paramiko
.SSHClient();
1921 ssh
.load_system_host_keys();
1922 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
1924 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
1925 except paramiko
.ssh_exception
.SSHException
:
1927 except socket
.gaierror
:
1928 log
.info("Error With URL "+httpurl
);
1930 except socket
.timeout
:
1931 log
.info("Error With URL "+httpurl
);
1933 sftp
= ssh
.open_sftp();
1934 sftp
.putfo(sftpfile
, urlparts
.path
);
1937 sftpfile
.seek(0, 0);
1940 def upload_file_to_sftp_file(sftpfile
, url
):
1944 def upload_file_to_sftp_string(sftpstring
, url
):
1945 sftpfileo
= BytesIO(sftpstring
);
1946 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
1950 def upload_file_to_sftp_string(url
):
1955 def download_file_from_pysftp_file(url
):
1956 urlparts
= urlparse
.urlparse(url
);
1957 file_name
= os
.path
.basename(urlparts
.path
);
1958 file_dir
= os
.path
.dirname(urlparts
.path
);
1959 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
1961 sftp_port
= urlparts
.port
;
1962 if(urlparts
.port
is None):
1965 sftp_port
= urlparts
.port
;
1966 if(urlparts
.username
is not None):
1967 sftp_username
= urlparts
.username
;
1969 sftp_username
= "anonymous";
1970 if(urlparts
.password
is not None):
1971 sftp_password
= urlparts
.password
;
1972 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
1973 sftp_password
= "anonymous";
1976 if(urlparts
.scheme
!="sftp"):
1979 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
1980 except paramiko
.ssh_exception
.SSHException
:
1982 except socket
.gaierror
:
1983 log
.info("Error With URL "+httpurl
);
1985 except socket
.timeout
:
1986 log
.info("Error With URL "+httpurl
);
1988 sftp
= ssh
.open_sftp();
1989 sftpfile
= BytesIO();
1990 sftp
.getfo(urlparts
.path
, sftpfile
);
1993 sftpfile
.seek(0, 0);
1996 def download_file_from_pysftp_file(url
):
2000 def download_file_from_pysftp_string(url
):
2001 sftpfile
= download_file_from_pysftp_file(url
);
2002 return sftpfile
.read();
2004 def download_file_from_ftp_string(url
):
2008 def upload_file_to_pysftp_file(sftpfile
, url
):
2009 urlparts
= urlparse
.urlparse(url
);
2010 file_name
= os
.path
.basename(urlparts
.path
);
2011 file_dir
= os
.path
.dirname(urlparts
.path
);
2012 sftp_port
= urlparts
.port
;
2013 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
2015 if(urlparts
.port
is None):
2018 sftp_port
= urlparts
.port
;
2019 if(urlparts
.username
is not None):
2020 sftp_username
= urlparts
.username
;
2022 sftp_username
= "anonymous";
2023 if(urlparts
.password
is not None):
2024 sftp_password
= urlparts
.password
;
2025 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2026 sftp_password
= "anonymous";
2029 if(urlparts
.scheme
!="sftp"):
2032 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
2033 except paramiko
.ssh_exception
.SSHException
:
2035 except socket
.gaierror
:
2036 log
.info("Error With URL "+httpurl
);
2038 except socket
.timeout
:
2039 log
.info("Error With URL "+httpurl
);
2041 sftp
= ssh
.open_sftp();
2042 sftp
.putfo(sftpfile
, urlparts
.path
);
2045 sftpfile
.seek(0, 0);
2048 def upload_file_to_pysftp_file(sftpfile
, url
):
2052 def upload_file_to_pysftp_string(sftpstring
, url
):
2053 sftpfileo
= BytesIO(sftpstring
);
2054 sftpfile
= upload_file_to_pysftp_files(ftpfileo
, url
);
2058 def upload_file_to_pysftp_string(url
):