4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwget.py - Last Update: 9/30/2023 Ver. 1.7.0 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, gzip
, time
, argparse
, cgi
, subprocess
, socket
, email
.utils
, datetime
, time
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
29 havemechanize
= False;
34 havemechanize
= False;
61 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
89 if(sys
.version
[0]=="2"):
91 from io
import StringIO
, BytesIO
;
94 from cStringIO
import StringIO
;
95 from cStringIO
import StringIO
as BytesIO
;
97 from StringIO
import StringIO
;
98 from StringIO
import StringIO
as BytesIO
;
99 # From http://python-future.org/compatible_idioms.html
100 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
101 from urllib
import urlencode
;
102 from urllib
import urlopen
as urlopenalt
;
103 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
, URLError
, build_opener
, HTTPCookieProcessor
;
104 import urlparse
, cookielib
;
105 from httplib
import HTTPConnection
, HTTPSConnection
;
106 if(sys
.version
[0]>="3"):
107 from io
import StringIO
, BytesIO
;
108 # From http://python-future.org/compatible_idioms.html
109 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
110 from urllib
.request
import urlopen
, Request
, install_opener
, build_opener
, HTTPCookieProcessor
;
111 from urllib
.error
import HTTPError
, URLError
;
112 import urllib
.parse
as urlparse
;
113 import http
.cookiejar
as cookielib
;
114 from http
.client
import HTTPConnection
, HTTPSConnection
;
116 __program_name__
= "PyWWW-Get";
117 __program_alt_name__
= "PyWWWGet";
118 __program_small_name__
= "wwwget";
119 __project__
= __program_name__
;
120 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
121 __version_info__
= (1, 7, 0, "RC 1", 1);
122 __version_date_info__
= (2023, 9, 30, "RC 1", 1);
123 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
124 __revision__
= __version_info__
[3];
125 __revision_id__
= "$Id$";
126 if(__version_info__
[4] is not None):
127 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
128 if(__version_info__
[4] is None):
129 __version_date_plusrc__
= __version_date__
;
130 if(__version_info__
[3] is not None):
131 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
132 if(__version_info__
[3] is None):
133 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
135 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
137 pytempdir
= tempfile
.gettempdir();
139 PyBitness
= platform
.architecture();
140 if(PyBitness
=="32bit" or PyBitness
=="32"):
142 elif(PyBitness
=="64bit" or PyBitness
=="64"):
147 compression_supported
= "gzip, deflate";
149 compression_supported
= "gzip, deflate, br";
151 compression_supported
= "gzip, deflate";
153 geturls_cj
= cookielib
.CookieJar();
154 windowsNT4_ua_string
= "Windows NT 4.0";
155 windowsNT4_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
156 windows2k_ua_string
= "Windows NT 5.0";
157 windows2k_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
158 windowsXP_ua_string
= "Windows NT 5.1";
159 windowsXP_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
160 windowsXP64_ua_string
= "Windows NT 5.2; Win64; x64";
161 windowsXP64_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
162 windows7_ua_string
= "Windows NT 6.1; Win64; x64";
163 windows7_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
164 windows8_ua_string
= "Windows NT 6.2; Win64; x64";
165 windows8_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
166 windows81_ua_string
= "Windows NT 6.3; Win64; x64";
167 windows81_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
168 windows10_ua_string
= "Windows NT 10.0; Win64; x64";
169 windows10_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
170 windows11_ua_string
= "Windows NT 11.0; Win64; x64";
171 windows11_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
172 geturls_ua_firefox_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:109.0) Gecko/20100101 Firefox/117.0";
173 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
174 geturls_ua_chrome_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
175 geturls_ua_chromium_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
176 geturls_ua_palemoon_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
177 geturls_ua_opera_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
178 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
179 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; Trident/7.0; rv:11.0) like Gecko";
180 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
181 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
182 if(platform
.python_implementation()!=""):
183 py_implementation
= platform
.python_implementation();
184 if(platform
.python_implementation()==""):
185 py_implementation
= "Python";
186 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=py_implementation
, pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
187 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
188 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
189 geturls_ua
= geturls_ua_firefox_windows7
;
190 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
191 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
192 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
193 geturls_headers_chrome_windows7
.update(windows7_ua_addon
);
194 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
195 geturls_headers_chromium_windows7
.update(windows7_ua_addon
);
196 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
197 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
198 geturls_headers_opera_windows7
.update(windows7_ua_addon
);
199 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
200 geturls_headers_vivaldi_windows7
.update(windows7_ua_addon
);
201 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
202 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
203 geturls_headers_microsoft_edge_windows7
.update(windows7_ua_addon
);
204 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
205 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
206 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
207 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
208 geturls_headers
= geturls_headers_firefox_windows7
;
209 geturls_download_sleep
= 0;
211 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
212 if(outtype
=="print" and dbgenable
):
215 elif(outtype
=="log" and dbgenable
):
216 logging
.info(dbgtxt
);
218 elif(outtype
=="warning" and dbgenable
):
219 logging
.warning(dbgtxt
);
221 elif(outtype
=="error" and dbgenable
):
222 logging
.error(dbgtxt
);
224 elif(outtype
=="critical" and dbgenable
):
225 logging
.critical(dbgtxt
);
227 elif(outtype
=="exception" and dbgenable
):
228 logging
.exception(dbgtxt
);
230 elif(outtype
=="logalt" and dbgenable
):
231 logging
.log(dgblevel
, dbgtxt
);
233 elif(outtype
=="debug" and dbgenable
):
234 logging
.debug(dbgtxt
);
242 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
243 dbgout
= verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
248 def add_url_param(url
, **params
):
250 parts
= list(urlparse
.urlsplit(url
));
251 d
= dict(cgi
.parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
253 parts
[n
]=urlencode(d
);
254 return urlparse
.urlunsplit(parts
);
256 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
257 def which_exec(execfile):
258 for path
in os
.environ
["PATH"].split(":"):
259 if os
.path
.exists(path
+ "/" + execfile):
260 return path
+ "/" + execfile;
262 def listize(varlist
):
270 newlistreg
.update({ilx
: varlist
[il
]});
271 newlistrev
.update({varlist
[il
]: ilx
});
274 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
277 def twolistize(varlist
):
287 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
288 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
289 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
290 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
293 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
294 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
295 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
298 def arglistize(proexec
, *varlist
):
302 newarglist
= [proexec
];
304 if varlist
[il
][0] is not None:
305 newarglist
.append(varlist
[il
][0]);
306 if varlist
[il
][1] is not None:
307 newarglist
.append(varlist
[il
][1]);
311 def fix_header_names(header_dict
):
312 header_dict
= {k
.title(): v
for k
, v
in header_dict
.items()};
315 # hms_string by ArcGIS Python Recipes
316 # https://arcpy.wordpress.com/2012/04/20/146/
317 def hms_string(sec_elapsed
):
318 h
= int(sec_elapsed
/ (60 * 60));
319 m
= int((sec_elapsed
% (60 * 60)) / 60);
320 s
= sec_elapsed
% 60.0;
321 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
323 # get_readable_size by Lipis
324 # http://stackoverflow.com/posts/14998888/revisions
325 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
327 if(unit
!="IEC" and unit
!="SI"):
330 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
331 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
334 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
335 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
340 if abs(bytes
) < unitsize
:
341 strformat
= "%3."+str(precision
)+"f%s";
342 pre_return_val
= (strformat
% (bytes
, unit
));
343 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
344 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
345 alt_return_val
= pre_return_val
.split();
346 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
349 strformat
= "%."+str(precision
)+"f%s";
350 pre_return_val
= (strformat
% (bytes
, "YiB"));
351 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
352 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
353 alt_return_val
= pre_return_val
.split();
354 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
357 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
359 usehashtypes
= usehashtypes
.lower();
360 getfilesize
= os
.path
.getsize(infile
);
361 return_val
= get_readable_size(getfilesize
, precision
, unit
);
363 hashtypelist
= usehashtypes
.split(",");
364 openfile
= open(infile
, "rb");
365 filecontents
= openfile
.read();
368 listnumend
= len(hashtypelist
);
369 while(listnumcount
< listnumend
):
370 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
371 hashtypelistup
= hashtypelistlow
.upper();
372 filehash
= hashlib
.new(hashtypelistup
);
373 filehash
.update(filecontents
);
374 filegethash
= filehash
.hexdigest();
375 return_val
.update({hashtypelistup
: filegethash
});
379 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
381 usehashtypes
= usehashtypes
.lower();
382 getfilesize
= len(instring
);
383 return_val
= get_readable_size(getfilesize
, precision
, unit
);
385 hashtypelist
= usehashtypes
.split(",");
387 listnumend
= len(hashtypelist
);
388 while(listnumcount
< listnumend
):
389 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
390 hashtypelistup
= hashtypelistlow
.upper();
391 filehash
= hashlib
.new(hashtypelistup
);
392 if(sys
.version
[0]=="2"):
393 filehash
.update(instring
);
394 if(sys
.version
[0]>="3"):
395 filehash
.update(instring
.encode('utf-8'));
396 filegethash
= filehash
.hexdigest();
397 return_val
.update({hashtypelistup
: filegethash
});
401 def http_status_to_reason(code
):
404 101: 'Switching Protocols',
409 203: 'Non-Authoritative Information',
411 205: 'Reset Content',
412 206: 'Partial Content',
414 208: 'Already Reported',
416 300: 'Multiple Choices',
417 301: 'Moved Permanently',
422 307: 'Temporary Redirect',
423 308: 'Permanent Redirect',
426 402: 'Payment Required',
429 405: 'Method Not Allowed',
430 406: 'Not Acceptable',
431 407: 'Proxy Authentication Required',
432 408: 'Request Timeout',
435 411: 'Length Required',
436 412: 'Precondition Failed',
437 413: 'Payload Too Large',
439 415: 'Unsupported Media Type',
440 416: 'Range Not Satisfiable',
441 417: 'Expectation Failed',
442 421: 'Misdirected Request',
443 422: 'Unprocessable Entity',
445 424: 'Failed Dependency',
446 426: 'Upgrade Required',
447 428: 'Precondition Required',
448 429: 'Too Many Requests',
449 431: 'Request Header Fields Too Large',
450 451: 'Unavailable For Legal Reasons',
451 500: 'Internal Server Error',
452 501: 'Not Implemented',
454 503: 'Service Unavailable',
455 504: 'Gateway Timeout',
456 505: 'HTTP Version Not Supported',
457 506: 'Variant Also Negotiates',
458 507: 'Insufficient Storage',
459 508: 'Loop Detected',
461 511: 'Network Authentication Required'
463 return reasons
.get(code
, 'Unknown Status Code');
465 def ftp_status_to_reason(code
):
467 110: 'Restart marker reply',
468 120: 'Service ready in nnn minutes',
469 125: 'Data connection already open; transfer starting',
470 150: 'File status okay; about to open data connection',
472 202: 'Command not implemented, superfluous at this site',
473 211: 'System status, or system help reply',
474 212: 'Directory status',
477 215: 'NAME system type',
478 220: 'Service ready for new user',
479 221: 'Service closing control connection',
480 225: 'Data connection open; no transfer in progress',
481 226: 'Closing data connection',
482 227: 'Entering Passive Mode',
483 230: 'User logged in, proceed',
484 250: 'Requested file action okay, completed',
485 257: '"PATHNAME" created',
486 331: 'User name okay, need password',
487 332: 'Need account for login',
488 350: 'Requested file action pending further information',
489 421: 'Service not available, closing control connection',
490 425: 'Can\'t open data connection',
491 426: 'Connection closed; transfer aborted',
492 450: 'Requested file action not taken',
493 451: 'Requested action aborted. Local error in processing',
494 452: 'Requested action not taken. Insufficient storage space in system',
495 500: 'Syntax error, command unrecognized',
496 501: 'Syntax error in parameters or arguments',
497 502: 'Command not implemented',
498 503: 'Bad sequence of commands',
499 504: 'Command not implemented for that parameter',
500 530: 'Not logged in',
501 532: 'Need account for storing files',
502 550: 'Requested action not taken. File unavailable',
503 551: 'Requested action aborted. Page type unknown',
504 552: 'Requested file action aborted. Exceeded storage allocation',
505 553: 'Requested action not taken. File name not allowed'
507 return reasons
.get(code
, 'Unknown Status Code');
509 def sftp_status_to_reason(code
):
513 2: 'SSH_FX_NO_SUCH_FILE',
514 3: 'SSH_FX_PERMISSION_DENIED',
516 5: 'SSH_FX_BAD_MESSAGE',
517 6: 'SSH_FX_NO_CONNECTION',
518 7: 'SSH_FX_CONNECTION_LOST',
519 8: 'SSH_FX_OP_UNSUPPORTED'
521 return reasons
.get(code
, 'Unknown Status Code');
523 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
524 if isinstance(headers
, dict):
526 if(sys
.version
[0]=="2"):
527 for headkey
, headvalue
in headers
.iteritems():
528 returnval
.append((headkey
, headvalue
));
529 if(sys
.version
[0]>="3"):
530 for headkey
, headvalue
in headers
.items():
531 returnval
.append((headkey
, headvalue
));
532 elif isinstance(headers
, list):
538 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
539 if isinstance(headers
, dict):
541 if(sys
.version
[0]=="2"):
542 for headkey
, headvalue
in headers
.iteritems():
543 returnval
.append(headkey
+": "+headvalue
);
544 if(sys
.version
[0]>="3"):
545 for headkey
, headvalue
in headers
.items():
546 returnval
.append(headkey
+": "+headvalue
);
547 elif isinstance(headers
, list):
553 def make_http_headers_from_pycurl_to_dict(headers
):
555 headers
= headers
.strip().split('\r\n');
556 for header
in headers
:
557 parts
= header
.split(': ', 1)
560 header_dict
[key
.title()] = value
;
563 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", compression_supported
), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
564 if isinstance(headers
, list):
569 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
571 elif isinstance(headers
, dict):
577 def get_httplib_support(checkvalue
=None):
578 global haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
580 returnval
.append("ftp");
581 returnval
.append("httplib");
583 returnval
.append("httplib2");
584 returnval
.append("urllib");
586 returnval
.append("urllib3");
587 returnval
.append("request3");
588 returnval
.append("request");
590 returnval
.append("requests");
592 returnval
.append("httpx");
593 returnval
.append("httpx2");
595 returnval
.append("mechanize");
597 returnval
.append("pycurl");
599 returnval
.append("sftp");
601 returnval
.append("pysftp");
602 if(not checkvalue
is None):
603 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
604 checkvalue
= "urllib";
605 if(checkvalue
=="httplib1"):
606 checkvalue
= "httplib";
607 if(checkvalue
in returnval
):
613 def check_httplib_support(checkvalue
="urllib"):
614 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
615 checkvalue
= "urllib";
616 if(checkvalue
=="httplib1"):
617 checkvalue
= "httplib";
618 returnval
= get_httplib_support(checkvalue
);
621 def get_httplib_support_list():
622 returnval
= get_httplib_support(None);
625 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", sleep
=-1):
626 global geturls_download_sleep
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
628 sleep
= geturls_download_sleep
;
629 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
630 httplibuse
= "urllib";
631 if(httplibuse
=="httplib1"):
632 httplibuse
= "httplib";
633 if(not haverequests
and httplibuse
=="requests"):
634 httplibuse
= "urllib";
635 if(not havehttpx
and httplibuse
=="httpx"):
636 httplibuse
= "urllib";
637 if(not havehttpx
and httplibuse
=="httpx2"):
638 httplibuse
= "urllib";
639 if(not havehttpcore
and httplibuse
=="httpcore"):
640 httplibuse
= "urllib";
641 if(not havehttpcore
and httplibuse
=="httpcore2"):
642 httplibuse
= "urllib";
643 if(not havemechanize
and httplibuse
=="mechanize"):
644 httplibuse
= "urllib";
645 if(not havepycurl
and httplibuse
=="pycurl"):
646 httplibuse
= "urllib";
647 if(not havehttplib2
and httplibuse
=="httplib2"):
648 httplibuse
= "httplib";
649 if(not haveparamiko
and httplibuse
=="sftp"):
651 if(not havepysftp
and httplibuse
=="pysftp"):
653 urlparts
= urlparse
.urlparse(httpurl
);
654 if(isinstance(httpheaders
, list)):
655 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
656 httpheaders
= fix_header_names(httpheaders
);
657 if(httpuseragent
is not None):
658 if('User-Agent' in httpheaders
):
659 httpheaders
['User-Agent'] = httpuseragent
;
661 httpuseragent
.update({'User-Agent': httpuseragent
});
662 if(httpreferer
is not None):
663 if('Referer' in httpheaders
):
664 httpheaders
['Referer'] = httpreferer
;
666 httpuseragent
.update({'Referer': httpreferer
});
667 if(urlparts
.username
is not None or urlparts
.password
is not None):
668 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
669 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
670 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
671 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="mechanize"):
672 if(isinstance(httpheaders
, dict)):
673 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
674 if(httplibuse
=="pycurl"):
675 if(isinstance(httpheaders
, dict)):
676 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
677 geturls_opener
.addheaders
= httpheaders
;
679 if(postdata
is not None and not isinstance(postdata
, dict)):
680 postdata
= urlencode(postdata
);
681 if(httplibuse
=="urllib"):
683 if(httpmethod
=="GET"):
684 geturls_text
= geturls_opener
.open(httpurl
);
685 elif(httpmethod
=="POST"):
686 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
688 geturls_text
= geturls_opener
.open(httpurl
);
689 except HTTPError
as geturls_text_error
:
690 geturls_text
= geturls_text_error
;
691 log
.info("Error With URL "+httpurl
);
693 log
.info("Error With URL "+httpurl
);
695 except socket
.timeout
:
696 log
.info("Error With URL "+httpurl
);
698 httpcodeout
= geturls_text
.getcode();
699 httpcodereason
= geturls_text
.reason
;
700 httpversionout
= "1.1";
701 httpmethodout
= httpmethod
;
702 httpurlout
= geturls_text
.geturl();
703 httpheaderout
= geturls_text
.info();
704 httpheadersentout
= httpheaders
;
705 elif(httplibuse
=="request"):
707 if(httpmethod
=="GET"):
708 geturls_request
= Request(httpurl
, headers
=httpheaders
);
709 geturls_text
= urlopen(geturls_request
);
710 elif(httpmethod
=="POST"):
711 geturls_request
= Request(httpurl
, headers
=httpheaders
);
712 geturls_text
= urlopen(geturls_request
, data
=postdata
);
714 geturls_request
= Request(httpurl
, headers
=httpheaders
);
715 geturls_text
= urlopen(geturls_request
);
716 except HTTPError
as geturls_text_error
:
717 geturls_text
= geturls_text_error
;
718 log
.info("Error With URL "+httpurl
);
720 log
.info("Error With URL "+httpurl
);
722 except socket
.timeout
:
723 log
.info("Error With URL "+httpurl
);
725 httpcodeout
= geturls_text
.getcode();
726 httpcodereason
= geturls_text
.reason
;
727 httpversionout
= "1.1";
728 httpmethodout
= httpmethod
;
729 httpurlout
= geturls_text
.geturl();
730 httpheaderout
= geturls_text
.headers
;
731 httpheadersentout
= httpheaders
;
732 elif(httplibuse
=="request3"):
733 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
735 if(httpmethod
=="GET"):
736 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
737 elif(httpmethod
=="POST"):
738 geturls_text
= geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
740 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
741 except urllib3
.exceptions
.ConnectTimeoutError
:
742 log
.info("Error With URL "+httpurl
);
744 except urllib3
.exceptions
.ConnectError
:
745 log
.info("Error With URL "+httpurl
);
747 except urllib3
.exceptions
.MaxRetryError
:
748 log
.info("Error With URL "+httpurl
);
750 except socket
.timeout
:
751 log
.info("Error With URL "+httpurl
);
753 httpcodeout
= geturls_text
.status
;
754 httpcodereason
= geturls_text
.reason
;
755 if(geturls_text
.version
=="10"):
756 httpversionout
= "1.0";
758 httpversionout
= "1.1";
759 httpmethodout
= httpmethod
;
760 httpurlout
= geturls_text
.geturl();
761 httpheaderout
= geturls_text
.info();
762 httpheadersentout
= httpheaders
;
763 elif(httplibuse
=="httplib"):
764 if(urlparts
[0]=="http"):
765 httpconn
= HTTPConnection(urlparts
[1]);
766 elif(urlparts
[0]=="https"):
767 httpconn
= HTTPSConnection(urlparts
[1]);
770 if(postdata
is not None and not isinstance(postdata
, dict)):
771 postdata
= urlencode(postdata
);
773 if(httpmethod
=="GET"):
774 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
775 elif(httpmethod
=="POST"):
776 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
778 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
779 except socket
.timeout
:
780 log
.info("Error With URL "+httpurl
);
782 except socket
.gaierror
:
783 log
.info("Error With URL "+httpurl
);
785 geturls_text
= httpconn
.getresponse();
786 httpcodeout
= geturls_text
.status
;
787 httpcodereason
= geturls_text
.reason
;
788 if(geturls_text
.version
=="10"):
789 httpversionout
= "1.0";
791 httpversionout
= "1.1";
792 httpmethodout
= httpmethod
;
793 httpurlout
= httpurl
;
794 httpheaderout
= geturls_text
.getheaders();
795 httpheadersentout
= httpheaders
;
796 elif(httplibuse
=="httplib2"):
797 if(urlparts
[0]=="http"):
798 httpconn
= HTTPConnectionWithTimeout(urlparts
[1]);
799 elif(urlparts
[0]=="https"):
800 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1]);
803 if(postdata
is not None and not isinstance(postdata
, dict)):
804 postdata
= urlencode(postdata
);
806 if(httpmethod
=="GET"):
807 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
808 elif(httpmethod
=="POST"):
809 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
811 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
812 except socket
.timeout
:
813 log
.info("Error With URL "+httpurl
);
815 except socket
.gaierror
:
816 log
.info("Error With URL "+httpurl
);
818 geturls_text
= httpconn
.getresponse();
819 httpcodeout
= geturls_text
.status
;
820 httpcodereason
= geturls_text
.reason
;
821 if(geturls_text
.version
=="10"):
822 httpversionout
= "1.0";
824 httpversionout
= "1.1";
825 httpmethodout
= httpmethod
;
826 httpurlout
= httpurl
;
827 httpheaderout
= geturls_text
.getheaders();
828 httpheadersentout
= httpheaders
;
829 elif(httplibuse
=="urllib3"):
830 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
832 if(httpmethod
=="GET"):
833 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
834 elif(httpmethod
=="POST"):
835 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
837 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
838 except urllib3
.exceptions
.ConnectTimeoutError
:
839 log
.info("Error With URL "+httpurl
);
841 except urllib3
.exceptions
.ConnectError
:
842 log
.info("Error With URL "+httpurl
);
844 except urllib3
.exceptions
.MaxRetryError
:
845 log
.info("Error With URL "+httpurl
);
847 except socket
.timeout
:
848 log
.info("Error With URL "+httpurl
);
850 httpcodeout
= geturls_text
.status
;
851 httpcodereason
= geturls_text
.reason
;
852 if(geturls_text
.version
=="10"):
853 httpversionout
= "1.0";
855 httpversionout
= "1.1";
856 httpmethodout
= httpmethod
;
857 httpurlout
= geturls_text
.geturl();
858 httpheaderout
= geturls_text
.info();
859 httpheadersentout
= httpheaders
;
860 elif(httplibuse
=="requests"):
862 reqsession
= requests
.Session();
863 if(httpmethod
=="GET"):
864 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
865 elif(httpmethod
=="POST"):
866 geturls_text
= reqsession
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
868 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
869 except requests
.exceptions
.ConnectTimeout
:
870 log
.info("Error With URL "+httpurl
);
872 except requests
.exceptions
.ConnectError
:
873 log
.info("Error With URL "+httpurl
);
875 except socket
.timeout
:
876 log
.info("Error With URL "+httpurl
);
878 httpcodeout
= geturls_text
.status_code
;
879 httpcodereason
= geturls_text
.reason
;
880 if(geturls_text
.raw
.version
=="10"):
881 httpversionout
= "1.0";
883 httpversionout
= "1.1";
884 httpmethodout
= httpmethod
;
885 httpurlout
= geturls_text
.url
;
886 httpheaderout
= geturls_text
.headers
;
887 httpheadersentout
= geturls_text
.request
.headers
;
888 elif(httplibuse
=="httpx"):
890 if(httpmethod
=="GET"):
891 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
892 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
893 elif(httpmethod
=="POST"):
894 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
895 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
897 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
898 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
899 except httpx
.ConnectTimeout
:
900 log
.info("Error With URL "+httpurl
);
902 except httpx
.ConnectError
:
903 log
.info("Error With URL "+httpurl
);
905 except socket
.timeout
:
906 log
.info("Error With URL "+httpurl
);
908 httpcodeout
= geturls_text
.status_code
;
909 httpcodereason
= geturls_text
.reason_phrase
;
910 httpversionout
= geturls_text
.http_version
;
911 httpmethodout
= httpmethod
;
912 httpurlout
= str(geturls_text
.url
);
913 httpheaderout
= geturls_text
.headers
;
914 httpheadersentout
= geturls_text
.request
.headers
;
915 elif(httplibuse
=="httpx2"):
917 if(httpmethod
=="GET"):
918 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
919 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
920 elif(httpmethod
=="POST"):
921 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
922 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
924 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
925 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
926 except httpx
.ConnectTimeout
:
927 log
.info("Error With URL "+httpurl
);
929 except httpx
.ConnectError
:
930 log
.info("Error With URL "+httpurl
);
932 except socket
.timeout
:
933 log
.info("Error With URL "+httpurl
);
935 httpcodeout
= geturls_text
.status_code
;
936 httpcodereason
= geturls_text
.reason
;
937 httpversionout
= geturls_text
.http_version
;
938 httpmethodout
= httpmethod
;
939 httpurlout
= str(geturls_text
.url
);
940 httpheaderout
= geturls_text
.headers
;
941 httpheadersentout
= geturls_text
.request
.headers
;
942 elif(httplibuse
=="httpcore"):
944 if(httpmethod
=="GET"):
945 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
946 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
947 elif(httpmethod
=="POST"):
948 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
949 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
951 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
952 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
953 except httpcore
.ConnectTimeout
:
954 log
.info("Error With URL "+httpurl
);
956 except httpcore
.ConnectError
:
957 log
.info("Error With URL "+httpurl
);
959 except socket
.timeout
:
960 log
.info("Error With URL "+httpurl
);
962 httpcodeout
= geturls_text
.status
;
963 httpcodereason
= http_status_to_reason(geturls_text
.status
);
964 httpversionout
= "1.1";
965 httpmethodout
= httpmethod
;
966 httpurlout
= str(httpurl
);
967 httpheaderout
= geturls_text
.headers
;
968 httpheadersentout
= httpheaders
;
969 elif(httplibuse
=="httpcore2"):
971 if(httpmethod
=="GET"):
972 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
973 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
974 elif(httpmethod
=="POST"):
975 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
976 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
978 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
979 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
980 except httpcore
.ConnectTimeout
:
981 log
.info("Error With URL "+httpurl
);
983 except httpcore
.ConnectError
:
984 log
.info("Error With URL "+httpurl
);
986 except socket
.timeout
:
987 log
.info("Error With URL "+httpurl
);
989 httpcodeout
= geturls_text
.status
;
990 httpcodereason
= geturls_text
.reason
;
991 httpversionout
= "1.1";
992 httpmethodout
= httpmethod
;
993 httpurlout
= str(httpurl
);
994 httpheaderout
= geturls_text
.headers
;
995 httpheadersentout
= httpheaders
;
996 elif(httplibuse
=="mechanize"):
997 geturls_opener
= mechanize
.Browser();
998 if(isinstance(httpheaders
, dict)):
999 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1001 geturls_opener
.addheaders
= httpheaders
;
1002 geturls_opener
.set_cookiejar(httpcookie
);
1003 geturls_opener
.set_handle_robots(False);
1004 if(postdata
is not None and not isinstance(postdata
, dict)):
1005 postdata
= urlencode(postdata
);
1007 if(httpmethod
=="GET"):
1008 geturls_text
= geturls_opener
.open(httpurl
);
1009 elif(httpmethod
=="POST"):
1010 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
1012 geturls_text
= geturls_opener
.open(httpurl
);
1013 except mechanize
.HTTPError
as geturls_text_error
:
1014 geturls_text
= geturls_text_error
;
1015 log
.info("Error With URL "+httpurl
);
1017 log
.info("Error With URL "+httpurl
);
1019 except socket
.timeout
:
1020 log
.info("Error With URL "+httpurl
);
1022 httpcodeout
= geturls_text
.code
;
1023 httpcodereason
= http_status_to_reason(geturls_text
.code
);
1024 httpversionout
= "1.1";
1025 httpmethodout
= httpmethod
;
1026 httpurlout
= geturls_text
.geturl();
1027 httpheaderout
= geturls_text
.info();
1028 reqhead
= geturls_opener
.request
;
1029 httpheadersentout
= reqhead
.header_items();
1030 elif(httplibuse
=="pycurl"):
1031 retrieved_body
= BytesIO();
1032 retrieved_headers
= BytesIO();
1034 if(httpmethod
=="GET"):
1035 geturls_text
= pycurl
.Curl();
1036 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
1037 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
1038 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
1039 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
1040 geturls_text
.perform();
1041 elif(httpmethod
=="POST"):
1042 geturls_text
= pycurl
.Curl();
1043 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
1044 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
1045 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
1046 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
1047 geturls_text
.perform();
1049 geturls_text
= pycurl
.Curl();
1050 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
1051 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
1052 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
1053 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
1054 geturls_text
.perform();
1055 retrieved_headers
.seek(0);
1056 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
1057 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+) ([A-Za-z\s]+)$', pycurlhead
.splitlines()[0])[0];
1058 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
1059 retrieved_body
.seek(0);
1060 except socket
.timeout
:
1061 log
.info("Error With URL "+httpurl
);
1063 except socket
.gaierror
:
1064 log
.info("Error With URL "+httpurl
);
1066 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
1067 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
1068 httpversionout
= pyhttpverinfo
[0];
1069 httpmethodout
= httpmethod
;
1070 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
1071 httpheaderout
= pycurlheadersout
;
1072 httpheadersentout
= httpheaders
;
1073 elif(httplibuse
=="ftp"):
1074 geturls_text
= download_file_from_ftp_file(httpurl
);
1075 if(not geturls_text
):
1077 log
.info("Downloading URL "+httpurl
);
1078 returnval_content
= geturls_text
.read()[:];
1079 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
1080 geturls_text
.close();
1081 elif(httplibuse
=="sftp"):
1082 geturls_text
= download_file_from_sftp_file(httpurl
);
1083 if(not geturls_text
):
1085 log
.info("Downloading URL "+httpurl
);
1086 returnval_content
= geturls_text
.read()[:];
1087 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
1088 geturls_text
.close();
1090 elif(httplibuse
=="pysftp"):
1091 geturls_text
= download_file_from_pysftp_file(httpurl
);
1092 if(not geturls_text
):
1094 log
.info("Downloading URL "+httpurl
);
1095 returnval_content
= geturls_text
.read()[:];
1096 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
1097 geturls_text
.close();
1101 if(isinstance(httpheaderout
, list) and httplibuse
!="pycurl"):
1102 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1103 if(isinstance(httpheaderout
, list) and httplibuse
=="pycurl"):
1104 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
1105 if(sys
.version
[0]=="2"):
1107 prehttpheaderout
= httpheaderout
;
1108 httpheaderkeys
= httpheaderout
.keys();
1109 imax
= len(httpheaderkeys
);
1113 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1115 except AttributeError:
1117 httpheaderout
= fix_header_names(httpheaderout
);
1118 if(isinstance(httpheadersentout
, list) and httplibuse
!="pycurl"):
1119 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1120 if(isinstance(httpheadersentout
, list) and httplibuse
=="pycurl"):
1121 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
1122 httpheadersentout
= fix_header_names(httpheadersentout
);
1123 log
.info("Downloading URL "+httpurl
);
1124 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="request3" or httplibuse
=="httplib" or httplibuse
=="httplib2" or httplibuse
=="urllib3" or httplibuse
=="mechanize" or httplibuse
=="httpx" or httplibuse
=="httpx2" or httplibuse
=="httpcore" or httplibuse
=="httpcore2"):
1125 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
1126 if(sys
.version
[0]=="2"):
1127 strbuf
= StringIO(geturls_text
.read());
1128 if(sys
.version
[0]>="3"):
1129 strbuf
= BytesIO(geturls_text
.read());
1130 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1131 returnval_content
= gzstrbuf
.read()[:];
1132 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
1133 returnval_content
= geturls_text
.read()[:];
1134 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1135 returnval_content
= geturls_text
.read()[:];
1136 returnval_content
= brotli
.decompress(returnval_content
);
1137 geturls_text
.close();
1138 elif(httplibuse
=="requests"):
1139 log
.info("Downloading URL "+httpurl
);
1140 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
1141 if(sys
.version
[0]=="2"):
1142 strbuf
= StringIO(geturls_text
.raw
.read());
1143 if(sys
.version
[0]>="3"):
1144 strbuf
= BytesIO(geturls_text
.raw
.read());
1145 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1146 returnval_content
= gzstrbuf
.read()[:];
1147 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
1148 returnval_content
= geturls_text
.raw
.read()[:];
1149 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1150 returnval_content
= geturls_text
.raw
.read()[:];
1151 returnval_content
= brotli
.decompress(returnval_content
);
1152 geturls_text
.close();
1153 elif(httplibuse
=="pycurl"):
1154 log
.info("Downloading URL "+httpurl
);
1155 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
1156 if(sys
.version
[0]=="2"):
1157 strbuf
= StringIO(retrieved_body
.read());
1158 if(sys
.version
[0]>="3"):
1159 strbuf
= BytesIO(retrieved_body
.read());
1160 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1161 returnval_content
= gzstrbuf
.read()[:];
1162 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
1163 returnval_content
= retrieved_body
.read()[:];
1164 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1165 returnval_content
= retrieved_body
.read()[:];
1166 returnval_content
= brotli
.decompress(returnval_content
);
1167 geturls_text
.close();
1168 elif(httplibuse
=="ftp" or httplibuse
=="sftp" or httplibuse
=="pysftp"):
1172 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1175 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1):
1176 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
1177 exec_time_start
= time
.time();
1178 myhash
= hashlib
.new("sha1");
1179 if(sys
.version
[0]=="2"):
1180 myhash
.update(httpurl
);
1181 myhash
.update(str(buffersize
));
1182 myhash
.update(str(exec_time_start
));
1183 if(sys
.version
[0]>="3"):
1184 myhash
.update(httpurl
.encode('utf-8'));
1185 myhash
.update(str(buffersize
).encode('utf-8'));
1186 myhash
.update(str(exec_time_start
).encode('utf-8'));
1187 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1189 sleep
= geturls_download_sleep
;
1190 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
1191 httplibuse
= "urllib";
1192 if(httplibuse
=="httplib1"):
1193 httplibuse
= "httplib";
1194 if(not haverequests
and httplibuse
=="requests"):
1195 httplibuse
= "urllib";
1196 if(not havehttpx
and httplibuse
=="httpx"):
1197 httplibuse
= "urllib";
1198 if(not havehttpx
and httplibuse
=="httpx2"):
1199 httplibuse
= "urllib";
1200 if(not havehttpcore
and httplibuse
=="httpcore"):
1201 httplibuse
= "urllib";
1202 if(not havehttpcore
and httplibuse
=="httpcore2"):
1203 httplibuse
= "urllib";
1204 if(not havemechanize
and httplibuse
=="mechanize"):
1205 httplibuse
= "urllib";
1206 if(not havepycurl
and httplibuse
=="pycurl"):
1207 httplibuse
= "urllib";
1208 if(not havehttplib2
and httplibuse
=="httplib2"):
1209 httplibuse
= "httplib";
1210 if(not haveparamiko
and httplibuse
=="sftp"):
1212 if(not haveparamiko
and httplibuse
=="pysftp"):
1214 urlparts
= urlparse
.urlparse(httpurl
);
1215 if(isinstance(httpheaders
, list)):
1216 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1217 httpheaders
= fix_header_names(httpheaders
);
1218 if(httpuseragent
is not None):
1219 if('User-Agent' in httpheaders
):
1220 httpheaders
['User-Agent'] = httpuseragent
;
1222 httpuseragent
.update({'User-Agent': httpuseragent
});
1223 if(httpreferer
is not None):
1224 if('Referer' in httpheaders
):
1225 httpheaders
['Referer'] = httpreferer
;
1227 httpuseragent
.update({'Referer': httpreferer
});
1228 if(urlparts
.username
is not None or urlparts
.password
is not None):
1229 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1230 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1231 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1232 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="mechanize"):
1233 if(isinstance(httpheaders
, dict)):
1234 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1235 if(httplibuse
=="pycurl"):
1236 if(isinstance(httpheaders
, dict)):
1237 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
1238 geturls_opener
.addheaders
= httpheaders
;
1240 if(httplibuse
=="urllib"):
1242 if(httpmethod
=="GET"):
1243 geturls_text
= geturls_opener
.open(httpurl
);
1244 elif(httpmethod
=="POST"):
1245 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
1247 geturls_text
= geturls_opener
.open(httpurl
);
1248 except HTTPError
as geturls_text_error
:
1249 geturls_text
= geturls_text_error
;
1250 log
.info("Error With URL "+httpurl
);
1252 log
.info("Error With URL "+httpurl
);
1254 except socket
.timeout
:
1255 log
.info("Error With URL "+httpurl
);
1257 except socket
.timeout
:
1258 log
.info("Error With URL "+httpurl
);
1260 httpcodeout
= geturls_text
.getcode();
1261 httpcodereason
= geturls_text
.reason
;
1262 httpversionout
= "1.1";
1263 httpmethodout
= httpmethod
;
1264 httpurlout
= geturls_text
.geturl();
1265 httpheaderout
= geturls_text
.info();
1266 httpheadersentout
= httpheaders
;
1267 elif(httplibuse
=="request"):
1269 if(httpmethod
=="GET"):
1270 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1271 geturls_text
= urlopen(geturls_request
);
1272 elif(httpmethod
=="POST"):
1273 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1274 geturls_text
= urlopen(geturls_request
, data
=postdata
);
1276 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1277 geturls_text
= urlopen(geturls_request
);
1278 except HTTPError
as geturls_text_error
:
1279 geturls_text
= geturls_text_error
;
1280 log
.info("Error With URL "+httpurl
);
1282 log
.info("Error With URL "+httpurl
);
1284 except socket
.timeout
:
1285 log
.info("Error With URL "+httpurl
);
1287 httpcodeout
= geturls_text
.getcode();
1288 httpcodereason
= geturls_text
.reason
;
1289 httpversionout
= "1.1";
1290 httpmethodout
= httpmethod
;
1291 httpurlout
= geturls_text
.geturl();
1292 httpheaderout
= geturls_text
.headers
;
1293 httpheadersentout
= httpheaders
;
1294 elif(httplibuse
=="request3"):
1295 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
1297 if(httpmethod
=="GET"):
1298 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1299 elif(httpmethod
=="POST"):
1300 geturls_text
= geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
1302 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1303 except urllib3
.exceptions
.ConnectTimeoutError
:
1304 log
.info("Error With URL "+httpurl
);
1306 except urllib3
.exceptions
.ConnectError
:
1307 log
.info("Error With URL "+httpurl
);
1309 except urllib3
.exceptions
.MaxRetryError
:
1310 log
.info("Error With URL "+httpurl
);
1312 except socket
.timeout
:
1313 log
.info("Error With URL "+httpurl
);
1315 httpcodeout
= geturls_text
.status
;
1316 httpcodereason
= geturls_text
.reason
;
1317 if(geturls_text
.version
=="10"):
1318 httpversionout
= "1.0";
1320 httpversionout
= "1.1";
1321 httpmethodout
= httpmethod
;
1322 httpurlout
= geturls_text
.geturl();
1323 httpheaderout
= geturls_text
.info();
1324 httpheadersentout
= httpheaders
;
1325 elif(httplibuse
=="httplib"):
1326 if(urlparts
[0]=="http"):
1327 httpconn
= HTTPConnection(urlparts
[1]);
1328 elif(urlparts
[0]=="https"):
1329 httpconn
= HTTPSConnection(urlparts
[1]);
1332 if(postdata
is not None and not isinstance(postdata
, dict)):
1333 postdata
= urlencode(postdata
);
1335 if(httpmethod
=="GET"):
1336 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1337 elif(httpmethod
=="POST"):
1338 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1340 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1341 except socket
.timeout
:
1342 log
.info("Error With URL "+httpurl
);
1344 except socket
.gaierror
:
1345 log
.info("Error With URL "+httpurl
);
1347 geturls_text
= httpconn
.getresponse();
1348 httpcodeout
= geturls_text
.status
;
1349 httpcodereason
= geturls_text
.reason
;
1350 if(geturls_text
.version
=="10"):
1351 httpversionout
= "1.0";
1353 httpversionout
= "1.1";
1354 httpmethodout
= httpmethod
;
1355 httpurlout
= httpurl
;
1356 httpheaderout
= geturls_text
.getheaders();
1357 httpheadersentout
= httpheaders
;
1358 elif(httplibuse
=="httplib2"):
1360 if(httpmethod
=="GET"):
1361 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1362 elif(httpmethod
=="POST"):
1363 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1365 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1366 except socket
.timeout
:
1367 log
.info("Error With URL "+httpurl
);
1369 except socket
.gaierror
:
1370 log
.info("Error With URL "+httpurl
);
1372 geturls_text
= httpconn
.getresponse();
1373 httpcodeout
= geturls_text
.status
;
1374 httpcodereason
= geturls_text
.reason
;
1375 if(geturls_text
.version
=="10"):
1376 httpversionout
= "1.0";
1378 httpversionout
= "1.1";
1379 httpmethodout
= httpmethod
;
1380 httpurlout
= httpurl
;
1381 httpheaderout
= geturls_text
.getheaders();
1382 httpheadersentout
= httpheaders
;
1383 elif(httplibuse
=="urllib3"):
1384 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
1386 if(httpmethod
=="GET"):
1387 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1388 elif(httpmethod
=="POST"):
1389 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
1391 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1392 except urllib3
.exceptions
.ConnectTimeoutError
:
1393 log
.info("Error With URL "+httpurl
);
1395 except urllib3
.exceptions
.ConnectError
:
1396 log
.info("Error With URL "+httpurl
);
1398 except urllib3
.exceptions
.MaxRetryError
:
1399 log
.info("Error With URL "+httpurl
);
1401 except socket
.timeout
:
1402 log
.info("Error With URL "+httpurl
);
1404 httpcodeout
= geturls_text
.status
;
1405 httpcodereason
= geturls_text
.reason
;
1406 if(geturls_text
.version
=="10"):
1407 httpversionout
= "1.0";
1409 httpversionout
= "1.1";
1410 httpmethodout
= httpmethod
;
1411 httpurlout
= geturls_text
.geturl();
1412 httpheaderout
= geturls_text
.info();
1413 httpheadersentout
= httpheaders
;
1414 elif(httplibuse
=="requests"):
1416 reqsession
= requests
.Session();
1417 if(httpmethod
=="GET"):
1418 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1419 elif(httpmethod
=="POST"):
1420 geturls_text
= reqsession
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1422 geturls_text
= reqsession
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1423 except requests
.exceptions
.ConnectTimeout
:
1424 log
.info("Error With URL "+httpurl
);
1426 except requests
.exceptions
.ConnectError
:
1427 log
.info("Error With URL "+httpurl
);
1429 except socket
.timeout
:
1430 log
.info("Error With URL "+httpurl
);
1432 httpcodeout
= geturls_text
.status_code
;
1433 httpcodereason
= geturls_text
.reason
;
1434 if(geturls_text
.raw
.version
=="10"):
1435 httpversionout
= "1.0";
1437 httpversionout
= "1.1";
1438 httpmethodout
= httpmethod
;
1439 httpurlout
= geturls_text
.url
;
1440 httpheaderout
= geturls_text
.headers
;
1441 httpheadersentout
= geturls_text
.request
.headers
;
1442 elif(httplibuse
=="httpx"):
1444 if(httpmethod
=="GET"):
1445 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1446 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1447 elif(httpmethod
=="POST"):
1448 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1449 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1451 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1452 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1453 except httpx
.ConnectTimeout
:
1454 log
.info("Error With URL "+httpurl
);
1456 except httpx
.ConnectError
:
1457 log
.info("Error With URL "+httpurl
);
1459 except socket
.timeout
:
1460 log
.info("Error With URL "+httpurl
);
1462 httpcodeout
= geturls_text
.status_code
;
1463 httpcodereason
= geturls_text
.reason_phrase
;
1464 httpversionout
= geturls_text
.http_version
;
1465 httpmethodout
= httpmethod
;
1466 httpurlout
= str(geturls_text
.url
);
1467 httpheaderout
= geturls_text
.headers
;
1468 httpheadersentout
= geturls_text
.request
.headers
;
1469 elif(httplibuse
=="httpx2"):
1471 if(httpmethod
=="GET"):
1472 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
1473 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1474 elif(httpmethod
=="POST"):
1475 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
1476 geturls_text
= httpx_pool
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1478 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
1479 geturls_text
= httpx_pool
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1480 except httpx
.ConnectTimeout
:
1481 log
.info("Error With URL "+httpurl
);
1483 except httpx
.ConnectError
:
1484 log
.info("Error With URL "+httpurl
);
1486 except socket
.timeout
:
1487 log
.info("Error With URL "+httpurl
);
1489 httpcodeout
= geturls_text
.status_code
;
1490 httpcodereason
= geturls_text
.reason_phrase
;
1491 httpversionout
= geturls_text
.http_version
;
1492 httpmethodout
= httpmethod
;
1493 httpurlout
= str(geturls_text
.url
);
1494 httpheaderout
= geturls_text
.headers
;
1495 httpheadersentout
= geturls_text
.request
.headers
;
1496 elif(httplibuse
=="httpcore"):
1498 if(httpmethod
=="GET"):
1499 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
1500 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1501 elif(httpmethod
=="POST"):
1502 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
1503 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
1505 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
1506 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1507 except httpcore
.ConnectTimeout
:
1508 log
.info("Error With URL "+httpurl
);
1510 except httpcore
.ConnectError
:
1511 log
.info("Error With URL "+httpurl
);
1513 except socket
.timeout
:
1514 log
.info("Error With URL "+httpurl
);
1516 httpcodeout
= geturls_text
.status
;
1517 httpcodereason
= http_status_to_reason(geturls_text
.status
);
1518 httpversionout
= "1.1";
1519 httpmethodout
= httpmethod
;
1520 httpurlout
= str(httpurl
);
1521 httpheaderout
= geturls_text
.headers
;
1522 httpheadersentout
= httpheaders
;
1523 elif(httplibuse
=="httpcore2"):
1525 if(httpmethod
=="GET"):
1526 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
1527 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1528 elif(httpmethod
=="POST"):
1529 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
1530 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
1532 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
1533 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1534 except httpcore
.ConnectTimeout
:
1535 log
.info("Error With URL "+httpurl
);
1537 except httpcore
.ConnectError
:
1538 log
.info("Error With URL "+httpurl
);
1540 except socket
.timeout
:
1541 log
.info("Error With URL "+httpurl
);
1543 httpcodeout
= geturls_text
.status
;
1544 httpcodereason
= geturls_text
.reason_phrase
;
1545 httpversionout
= "1.1";
1546 httpmethodout
= httpmethod
;
1547 httpurlout
= str(httpurl
);
1548 httpheaderout
= geturls_text
.headers
;
1549 httpheadersentout
= httpheaders
;
1550 elif(httplibuse
=="mechanize"):
1551 geturls_opener
= mechanize
.Browser();
1552 if(isinstance(httpheaders
, dict)):
1553 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1555 geturls_opener
.addheaders
= httpheaders
;
1556 geturls_opener
.set_cookiejar(httpcookie
);
1557 geturls_opener
.set_handle_robots(False);
1558 if(postdata
is not None and not isinstance(postdata
, dict)):
1559 postdata
= urlencode(postdata
);
1561 if(httpmethod
=="GET"):
1562 geturls_text
= geturls_opener
.open(httpurl
);
1563 elif(httpmethod
=="POST"):
1564 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
1566 geturls_text
= geturls_opener
.open(httpurl
);
1567 except mechanize
.HTTPError
as geturls_text_error
:
1568 geturls_text
= geturls_text_error
;
1569 log
.info("Error With URL "+httpurl
);
1571 log
.info("Error With URL "+httpurl
);
1573 except socket
.timeout
:
1574 log
.info("Error With URL "+httpurl
);
1576 httpcodeout
= geturls_text
.code
;
1577 httpcodereason
= http_status_to_reason(geturls_text
.code
);
1578 httpversionout
= "1.1";
1579 httpmethodout
= httpmethod
;
1580 httpurlout
= geturls_text
.geturl();
1581 httpheaderout
= geturls_text
.info();
1582 reqhead
= geturls_opener
.request
;
1583 httpheadersentout
= reqhead
.header_items();
1584 elif(httplibuse
=="pycurl"):
1585 retrieved_body
= BytesIO();
1586 retrieved_headers
= BytesIO();
1588 if(httpmethod
=="GET"):
1589 geturls_text
= pycurl
.Curl();
1590 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
1591 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
1592 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
1593 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
1594 geturls_text
.perform();
1595 elif(httpmethod
=="POST"):
1596 geturls_text
= pycurl
.Curl();
1597 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
1598 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
1599 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
1600 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
1601 geturls_text
.perform();
1603 geturls_text
= pycurl
.Curl();
1604 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
1605 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
1606 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
1607 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
1608 geturls_text
.perform();
1609 retrieved_headers
.seek(0);
1610 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
1611 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+) ([A-Za-z\s]+)$', pycurlhead
.splitlines()[0])[0];
1612 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
1613 retrieved_body
.seek(0);
1614 except socket
.timeout
:
1615 log
.info("Error With URL "+httpurl
);
1617 except socket
.gaierror
:
1618 log
.info("Error With URL "+httpurl
);
1620 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
1621 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
1622 httpversionout
= "1.1";
1623 httpmethodout
= httpmethod
;
1624 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
1625 httpheaderout
= pycurlheadersout
;
1626 httpheadersentout
= httpheaders
;
1627 elif(httplibuse
=="ftp"):
1628 geturls_text
= download_file_from_ftp_file(httpurl
);
1629 if(not geturls_text
):
1631 geturls_text
.seek(0, 2);
1632 downloadsize
= geturls_text
.tell();
1633 geturls_text
.seek(0, 0);
1634 elif(httplibuse
=="sftp"):
1635 geturls_text
= download_file_from_sftp_file(httpurl
);
1636 if(not geturls_text
):
1638 geturls_text
.seek(0, 2);
1639 downloadsize
= geturls_text
.tell();
1640 geturls_text
.seek(0, 0);
1641 if(downloadsize
is not None):
1642 downloadsize
= int(downloadsize
);
1643 if downloadsize
is None: downloadsize
= 0;
1646 elif(httplibuse
=="pysftp"):
1647 geturls_text
= download_file_from_pysftp_file(httpurl
);
1648 if(not geturls_text
):
1650 geturls_text
.seek(0, 2);
1651 downloadsize
= geturls_text
.tell();
1652 geturls_text
.seek(0, 0);
1655 if(isinstance(httpheaderout
, list) and httplibuse
!="pycurl"):
1656 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1657 if(isinstance(httpheaderout
, list) and httplibuse
=="pycurl"):
1658 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
1659 if(sys
.version
[0]=="2"):
1661 prehttpheaderout
= httpheaderout
;
1662 httpheaderkeys
= httpheaderout
.keys();
1663 imax
= len(httpheaderkeys
);
1667 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1669 except AttributeError:
1671 httpheaderout
= fix_header_names(httpheaderout
);
1672 if(isinstance(httpheadersentout
, list) and httplibuse
!="pycurl"):
1673 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1674 if(isinstance(httpheadersentout
, list) and httplibuse
=="pycurl"):
1675 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
1676 httpheadersentout
= fix_header_names(httpheadersentout
);
1677 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="request3" or httplibuse
=="httplib" or httplibuse
=="httplib2" or httplibuse
=="urllib3" or httplibuse
=="requests" or httplibuse
=="mechanize" or httplibuse
=="httpx" or httplibuse
=="httpx2" or httplibuse
=="httpcore" or httplibuse
=="httpcore2"):
1678 downloadsize
= httpheaderout
.get('Content-Length');
1679 if(downloadsize
is not None):
1680 downloadsize
= int(downloadsize
);
1681 if downloadsize
is None: downloadsize
= 0;
1684 log
.info("Downloading URL "+httpurl
);
1685 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="request3" or httplibuse
=="httplib" or httplibuse
=="httplib2" or httplibuse
=="urllib3" or httplibuse
=="mechanize" or httplibuse
=="httpx" or httplibuse
=="httpx2" or httplibuse
=="httpcore" or httplibuse
=="httpcore2" or httplibuse
=="ftp" or httplibuse
=="sftp" or httplibuse
=="pysftp"):
1686 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1687 tmpfilename
= f
.name
;
1689 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
1690 except AttributeError:
1692 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1697 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1699 databytes
= geturls_text
.read(buffersize
);
1700 if not databytes
: break;
1701 datasize
= len(databytes
);
1702 fulldatasize
= datasize
+ fulldatasize
;
1705 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1706 downloaddiff
= fulldatasize
- prevdownsize
;
1707 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1708 prevdownsize
= fulldatasize
;
1711 elif(httplibuse
=="requests"):
1712 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1713 tmpfilename
= f
.name
;
1715 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
1716 except AttributeError:
1718 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1723 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1724 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
1725 datasize
= len(databytes
);
1726 fulldatasize
= datasize
+ fulldatasize
;
1729 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1730 downloaddiff
= fulldatasize
- prevdownsize
;
1731 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1732 prevdownsize
= fulldatasize
;
1735 elif(httplibuse
=="pycurl"):
1736 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1737 tmpfilename
= f
.name
;
1739 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
1740 except AttributeError:
1742 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1747 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1749 databytes
= retrieved_body
.read(buffersize
);
1750 if not databytes
: break;
1751 datasize
= len(databytes
);
1752 fulldatasize
= datasize
+ fulldatasize
;
1755 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1756 downloaddiff
= fulldatasize
- prevdownsize
;
1757 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1758 prevdownsize
= fulldatasize
;
1763 geturls_text
.close();
1764 exec_time_end
= time
.time();
1765 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1766 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1769 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1770 global geturls_download_sleep
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
1772 sleep
= geturls_download_sleep
;
1773 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
1774 httplibuse
= "urllib";
1775 if(httplibuse
=="httplib1"):
1776 httplibuse
= "httplib";
1777 if(not haverequests
and httplibuse
=="requests"):
1778 httplibuse
= "urllib";
1779 if(not havehttpx
and httplibuse
=="httpx"):
1780 httplibuse
= "urllib";
1781 if(not havehttpx
and httplibuse
=="httpx2"):
1782 httplibuse
= "urllib";
1783 if(not havehttpcore
and httplibuse
=="httpcore"):
1784 httplibuse
= "urllib";
1785 if(not havehttpcore
and httplibuse
=="httpcore2"):
1786 httplibuse
= "urllib";
1787 if(not havemechanize
and httplibuse
=="mechanize"):
1788 httplibuse
= "urllib";
1789 if(not havepycurl
and httplibuse
=="pycurl"):
1790 httplibuse
= "urllib";
1791 if(not havehttplib2
and httplibuse
=="httplib2"):
1792 httplibuse
= "httplib";
1793 if(not haveparamiko
and httplibuse
=="sftp"):
1795 if(not havepysftp
and httplibuse
=="pysftp"):
1797 if(not outfile
=="-"):
1798 outpath
= outpath
.rstrip(os
.path
.sep
);
1799 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1800 if(not os
.path
.exists(outpath
)):
1801 os
.makedirs(outpath
);
1802 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1804 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1806 pretmpfilename
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, buffersize
[0], sleep
);
1807 if(not pretmpfilename
):
1809 tmpfilename
= pretmpfilename
['Filename'];
1810 downloadsize
= os
.path
.getsize(tmpfilename
);
1812 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1813 exec_time_start
= time
.time();
1814 shutil
.move(tmpfilename
, filepath
);
1816 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1817 except AttributeError:
1819 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1824 exec_time_end
= time
.time();
1825 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1826 if(os
.path
.exists(tmpfilename
)):
1827 os
.remove(tmpfilename
);
1828 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1829 if(outfile
=="-" and sys
.version
[0]=="2"):
1830 pretmpfilename
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, buffersize
[0], sleep
);
1831 if(not pretmpfilename
):
1833 tmpfilename
= pretmpfilename
['Filename'];
1834 downloadsize
= os
.path
.getsize(tmpfilename
);
1837 exec_time_start
= time
.time();
1838 with
open(tmpfilename
, 'rb') as ft
:
1841 databytes
= ft
.read(buffersize
[1]);
1842 if not databytes
: break;
1843 datasize
= len(databytes
);
1844 fulldatasize
= datasize
+ fulldatasize
;
1847 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1848 downloaddiff
= fulldatasize
- prevdownsize
;
1849 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1850 prevdownsize
= fulldatasize
;
1853 fdata
= f
.getvalue();
1856 os
.remove(tmpfilename
);
1857 exec_time_end
= time
.time();
1858 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1859 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1860 if(outfile
=="-" and sys
.version
[0]>="3"):
1861 pretmpfilename
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, buffersize
[0], sleep
);
1862 tmpfilename
= pretmpfilename
['Filename'];
1863 downloadsize
= os
.path
.getsize(tmpfilename
);
1866 exec_time_start
= time
.time();
1867 with
open(tmpfilename
, 'rb') as ft
:
1870 databytes
= ft
.read(buffersize
[1]);
1871 if not databytes
: break;
1872 datasize
= len(databytes
);
1873 fulldatasize
= datasize
+ fulldatasize
;
1876 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1877 downloaddiff
= fulldatasize
- prevdownsize
;
1878 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1879 prevdownsize
= fulldatasize
;
1882 fdata
= f
.getvalue();
1885 os
.remove(tmpfilename
);
1886 exec_time_end
= time
.time();
1887 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1888 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1891 def download_from_url_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1892 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib", sleep
);
1895 def download_from_url_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1896 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request", sleep
);
1899 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1900 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request3", sleep
);
1903 def download_from_url_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1904 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib", sleep
);
1907 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1908 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib2", sleep
);
1911 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1912 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib3", sleep
);
1915 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1916 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "requests", sleep
);
1919 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1920 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx", sleep
);
1923 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1924 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx2", sleep
);
1927 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1928 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore", sleep
);
1931 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1932 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore2", sleep
);
1935 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1936 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "mechanize", sleep
);
1939 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1940 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "pycurl", sleep
);
1943 def download_from_url_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1944 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "ftp", sleep
);
1947 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1948 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "sftp", sleep
);
1951 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1952 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "pysftp", sleep
);
1955 def download_from_url_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1956 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib", buffersize
, sleep
);
1959 def download_from_url_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1960 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request", buffersize
, sleep
);
1963 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1964 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request3", buffersize
, sleep
);
1967 def download_from_url_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1968 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib", buffersize
, sleep
);
1971 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1972 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib2", buffersize
, sleep
);
1975 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1976 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib3", buffersize
, sleep
);
1979 def download_from_url_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1980 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "requests", buffersize
, sleep
);
1983 def download_from_url_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1984 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx", buffersize
, sleep
);
1987 def download_from_url_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1988 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx2", buffersize
, sleep
);
1991 def download_from_url_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1992 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore", buffersize
, sleep
);
1995 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1996 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore2", buffersize
, sleep
);
1999 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2000 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "mechanize", buffersize
, sleep
);
2003 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2004 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "pycurl", buffersize
, sleep
);
2007 def download_from_url_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2008 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "ftp", buffersize
, sleep
);
2011 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2012 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "sftp", buffersize
, sleep
);
2015 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2016 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "pysftp", buffersize
, sleep
);
2019 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2020 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib", outfile
, outpath
, buffersize
, sleep
);
2023 def download_from_url_to_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2024 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request", outfile
, outpath
, buffersize
, sleep
);
2027 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2028 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request3", outfile
, outpath
, buffersize
, sleep
);
2031 def download_from_url_to_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2032 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib", outfile
, outpath
, buffersize
, sleep
);
2035 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2036 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib2", outfile
, outpath
, buffersize
, sleep
);
2039 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2040 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib3", outfile
, outpath
, buffersize
, sleep
);
2043 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2044 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "requests", outfile
, outpath
, buffersize
, sleep
);
2047 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2048 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx", outfile
, outpath
, buffersize
, sleep
);
2051 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2052 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx2", outfile
, outpath
, buffersize
, sleep
);
2055 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2056 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore", outfile
, outpath
, buffersize
, sleep
);
2059 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2060 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore2", outfile
, outpath
, buffersize
, sleep
);
2063 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2064 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "mechanize", outfile
, outpath
, buffersize
, sleep
);
2067 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2068 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "pycurl", outfile
, outpath
, buffersize
, sleep
);
2071 def download_from_url_to_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2072 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "ftp", outfile
, outpath
, buffersize
, sleep
);
2075 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2076 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "sftp", outfile
, outpath
, buffersize
, sleep
);
2079 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2080 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "pysftp", outfile
, outpath
, buffersize
, sleep
);
2083 def download_file_from_ftp_file(url
):
2084 urlparts
= urlparse
.urlparse(url
);
2085 file_name
= os
.path
.basename(urlparts
.path
);
2086 file_dir
= os
.path
.dirname(urlparts
.path
);
2087 if(urlparts
.username
is not None):
2088 ftp_username
= urlparts
.username
;
2090 ftp_username
= "anonymous";
2091 if(urlparts
.password
is not None):
2092 ftp_password
= urlparts
.password
;
2093 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2094 ftp_password
= "anonymous";
2097 if(urlparts
.scheme
=="ftp"):
2099 elif(urlparts
.scheme
=="ftps"):
2103 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
2105 ftp_port
= urlparts
.port
;
2106 if(urlparts
.port
is None):
2109 ftp
.connect(urlparts
.hostname
, ftp_port
);
2110 except socket
.gaierror
:
2111 log
.info("Error With URL "+httpurl
);
2113 except socket
.timeout
:
2114 log
.info("Error With URL "+httpurl
);
2116 ftp
.login(urlparts
.username
, urlparts
.password
);
2117 if(urlparts
.scheme
=="ftps"):
2119 ftpfile
= BytesIO();
2120 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
2121 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
2126 def download_file_from_ftp_string(url
):
2127 ftpfile
= download_file_from_ftp_file(url
);
2128 return ftpfile
.read();
2130 def upload_file_to_ftp_file(ftpfile
, url
):
2131 urlparts
= urlparse
.urlparse(url
);
2132 file_name
= os
.path
.basename(urlparts
.path
);
2133 file_dir
= os
.path
.dirname(urlparts
.path
);
2134 if(urlparts
.username
is not None):
2135 ftp_username
= urlparts
.username
;
2137 ftp_username
= "anonymous";
2138 if(urlparts
.password
is not None):
2139 ftp_password
= urlparts
.password
;
2140 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2141 ftp_password
= "anonymous";
2144 if(urlparts
.scheme
=="ftp"):
2146 elif(urlparts
.scheme
=="ftps"):
2150 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
2152 ftp_port
= urlparts
.port
;
2153 if(urlparts
.port
is None):
2156 ftp
.connect(urlparts
.hostname
, ftp_port
);
2157 except socket
.gaierror
:
2158 log
.info("Error With URL "+httpurl
);
2160 except socket
.timeout
:
2161 log
.info("Error With URL "+httpurl
);
2163 ftp
.login(urlparts
.username
, urlparts
.password
);
2164 if(urlparts
.scheme
=="ftps"):
2166 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
2171 def upload_file_to_ftp_string(ftpstring
, url
):
2172 ftpfileo
= BytesIO(ftpstring
);
2173 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
2178 def download_file_from_sftp_file(url
):
2179 urlparts
= urlparse
.urlparse(url
);
2180 file_name
= os
.path
.basename(urlparts
.path
);
2181 file_dir
= os
.path
.dirname(urlparts
.path
);
2182 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
2184 sftp_port
= urlparts
.port
;
2185 if(urlparts
.port
is None):
2188 sftp_port
= urlparts
.port
;
2189 if(urlparts
.username
is not None):
2190 sftp_username
= urlparts
.username
;
2192 sftp_username
= "anonymous";
2193 if(urlparts
.password
is not None):
2194 sftp_password
= urlparts
.password
;
2195 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2196 sftp_password
= "anonymous";
2199 if(urlparts
.scheme
!="sftp"):
2201 ssh
= paramiko
.SSHClient();
2202 ssh
.load_system_host_keys();
2203 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
2205 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
2206 except paramiko
.ssh_exception
.SSHException
:
2208 except socket
.gaierror
:
2209 log
.info("Error With URL "+httpurl
);
2211 except socket
.timeout
:
2212 log
.info("Error With URL "+httpurl
);
2214 sftp
= ssh
.open_sftp();
2215 sftpfile
= BytesIO();
2216 sftp
.getfo(urlparts
.path
, sftpfile
);
2219 sftpfile
.seek(0, 0);
2222 def download_file_from_sftp_file(url
):
2226 def download_file_from_sftp_string(url
):
2227 sftpfile
= download_file_from_sftp_file(url
);
2228 return sftpfile
.read();
2230 def download_file_from_ftp_string(url
):
2234 def upload_file_to_sftp_file(sftpfile
, url
):
2235 urlparts
= urlparse
.urlparse(url
);
2236 file_name
= os
.path
.basename(urlparts
.path
);
2237 file_dir
= os
.path
.dirname(urlparts
.path
);
2238 sftp_port
= urlparts
.port
;
2239 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
2241 if(urlparts
.port
is None):
2244 sftp_port
= urlparts
.port
;
2245 if(urlparts
.username
is not None):
2246 sftp_username
= urlparts
.username
;
2248 sftp_username
= "anonymous";
2249 if(urlparts
.password
is not None):
2250 sftp_password
= urlparts
.password
;
2251 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2252 sftp_password
= "anonymous";
2255 if(urlparts
.scheme
!="sftp"):
2257 ssh
= paramiko
.SSHClient();
2258 ssh
.load_system_host_keys();
2259 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
2261 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
2262 except paramiko
.ssh_exception
.SSHException
:
2264 except socket
.gaierror
:
2265 log
.info("Error With URL "+httpurl
);
2267 except socket
.timeout
:
2268 log
.info("Error With URL "+httpurl
);
2270 sftp
= ssh
.open_sftp();
2271 sftp
.putfo(sftpfile
, urlparts
.path
);
2274 sftpfile
.seek(0, 0);
2277 def upload_file_to_sftp_file(sftpfile
, url
):
2281 def upload_file_to_sftp_string(sftpstring
, url
):
2282 sftpfileo
= BytesIO(sftpstring
);
2283 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
2287 def upload_file_to_sftp_string(url
):
2292 def download_file_from_pysftp_file(url
):
2293 urlparts
= urlparse
.urlparse(url
);
2294 file_name
= os
.path
.basename(urlparts
.path
);
2295 file_dir
= os
.path
.dirname(urlparts
.path
);
2296 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
2298 sftp_port
= urlparts
.port
;
2299 if(urlparts
.port
is None):
2302 sftp_port
= urlparts
.port
;
2303 if(urlparts
.username
is not None):
2304 sftp_username
= urlparts
.username
;
2306 sftp_username
= "anonymous";
2307 if(urlparts
.password
is not None):
2308 sftp_password
= urlparts
.password
;
2309 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2310 sftp_password
= "anonymous";
2313 if(urlparts
.scheme
!="sftp"):
2316 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
2317 except paramiko
.ssh_exception
.SSHException
:
2319 except socket
.gaierror
:
2320 log
.info("Error With URL "+httpurl
);
2322 except socket
.timeout
:
2323 log
.info("Error With URL "+httpurl
);
2325 sftp
= ssh
.open_sftp();
2326 sftpfile
= BytesIO();
2327 sftp
.getfo(urlparts
.path
, sftpfile
);
2330 sftpfile
.seek(0, 0);
2333 def download_file_from_pysftp_file(url
):
2337 def download_file_from_pysftp_string(url
):
2338 sftpfile
= download_file_from_pysftp_file(url
);
2339 return sftpfile
.read();
2341 def download_file_from_ftp_string(url
):
2345 def upload_file_to_pysftp_file(sftpfile
, url
):
2346 urlparts
= urlparse
.urlparse(url
);
2347 file_name
= os
.path
.basename(urlparts
.path
);
2348 file_dir
= os
.path
.dirname(urlparts
.path
);
2349 sftp_port
= urlparts
.port
;
2350 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
2352 if(urlparts
.port
is None):
2355 sftp_port
= urlparts
.port
;
2356 if(urlparts
.username
is not None):
2357 sftp_username
= urlparts
.username
;
2359 sftp_username
= "anonymous";
2360 if(urlparts
.password
is not None):
2361 sftp_password
= urlparts
.password
;
2362 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2363 sftp_password
= "anonymous";
2366 if(urlparts
.scheme
!="sftp"):
2369 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
2370 except paramiko
.ssh_exception
.SSHException
:
2372 except socket
.gaierror
:
2373 log
.info("Error With URL "+httpurl
);
2375 except socket
.timeout
:
2376 log
.info("Error With URL "+httpurl
);
2378 sftp
= ssh
.open_sftp();
2379 sftp
.putfo(sftpfile
, urlparts
.path
);
2382 sftpfile
.seek(0, 0);
2385 def upload_file_to_pysftp_file(sftpfile
, url
):
2389 def upload_file_to_pysftp_string(sftpstring
, url
):
2390 sftpfileo
= BytesIO(sftpstring
);
2391 sftpfile
= upload_file_to_pysftp_files(ftpfileo
, url
);
2395 def upload_file_to_pysftp_string(url
):