4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwget.py - Last Update: 9/30/2023 Ver. 1.7.0 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, gzip
, time
, argparse
, cgi
, subprocess
, socket
, email
.utils
, datetime
, time
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
29 havemechanize
= False;
34 havemechanize
= False;
61 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
89 if(sys
.version
[0]=="2"):
91 from io
import StringIO
, BytesIO
;
94 from cStringIO
import StringIO
;
95 from cStringIO
import StringIO
as BytesIO
;
97 from StringIO
import StringIO
;
98 from StringIO
import StringIO
as BytesIO
;
99 # From http://python-future.org/compatible_idioms.html
100 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
101 from urllib
import urlencode
;
102 from urllib
import urlopen
as urlopenalt
;
103 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
, URLError
, build_opener
, HTTPCookieProcessor
;
104 import urlparse
, cookielib
;
105 from httplib
import HTTPConnection
, HTTPSConnection
;
106 if(sys
.version
[0]>="3"):
107 from io
import StringIO
, BytesIO
;
108 # From http://python-future.org/compatible_idioms.html
109 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
110 from urllib
.request
import urlopen
, Request
, install_opener
, build_opener
, HTTPCookieProcessor
;
111 from urllib
.error
import HTTPError
, URLError
;
112 import urllib
.parse
as urlparse
;
113 import http
.cookiejar
as cookielib
;
114 from http
.client
import HTTPConnection
, HTTPSConnection
;
116 __program_name__
= "PyWWW-Get";
117 __program_alt_name__
= "PyWWWGet";
118 __program_small_name__
= "wwwget";
119 __project__
= __program_name__
;
120 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
121 __version_info__
= (1, 7, 0, "RC 1", 1);
122 __version_date_info__
= (2023, 9, 30, "RC 1", 1);
123 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
124 __revision__
= __version_info__
[3];
125 __revision_id__
= "$Id$";
126 if(__version_info__
[4] is not None):
127 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
128 if(__version_info__
[4] is None):
129 __version_date_plusrc__
= __version_date__
;
130 if(__version_info__
[3] is not None):
131 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
132 if(__version_info__
[3] is None):
133 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
135 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
137 pytempdir
= tempfile
.gettempdir();
139 PyBitness
= platform
.architecture();
140 if(PyBitness
=="32bit" or PyBitness
=="32"):
142 elif(PyBitness
=="64bit" or PyBitness
=="64"):
147 compression_supported
= "gzip, deflate";
149 compression_supported
= "gzip, deflate, br";
151 compression_supported
= "gzip, deflate";
153 geturls_cj
= cookielib
.CookieJar();
154 windowsNT4_ua_string
= "Windows NT 4.0";
155 windowsNT4_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "4.0.0"};
156 windows2k_ua_string
= "Windows NT 5.0";
157 windows2k_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.0.0"};
158 windowsXP_ua_string
= "Windows NT 5.1";
159 windowsXP_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "32", 'SEC-CH-UA-PLATFORM': "5.1.0"};
160 windowsXP64_ua_string
= "Windows NT 5.2; Win64; x64";
161 windowsXP64_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "5.1.0"};
162 windows7_ua_string
= "Windows NT 6.1; Win64; x64";
163 windows7_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.1.0"};
164 windows8_ua_string
= "Windows NT 6.2; Win64; x64";
165 windows8_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.2.0"};
166 windows81_ua_string
= "Windows NT 6.3; Win64; x64";
167 windows81_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "6.3.0"};
168 windows10_ua_string
= "Windows NT 10.0; Win64; x64";
169 windows10_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "10.0.0"};
170 windows11_ua_string
= "Windows NT 11.0; Win64; x64";
171 windows11_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-BITNESS': "64", 'SEC-CH-UA-PLATFORM': "11.0.0"};
172 geturls_ua_firefox_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:109.0) Gecko/20100101 Firefox/117.0";
173 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
174 geturls_ua_chrome_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
175 geturls_ua_chromium_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
176 geturls_ua_palemoon_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
177 geturls_ua_opera_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
178 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
179 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; Trident/7.0; rv:11.0) like Gecko";
180 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
181 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
182 if(platform
.python_implementation()!=""):
183 py_implementation
= platform
.python_implementation();
184 if(platform
.python_implementation()==""):
185 py_implementation
= "Python";
186 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=py_implementation
, pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
187 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
188 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
189 geturls_ua
= geturls_ua_firefox_windows7
;
190 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
191 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
192 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
193 geturls_headers_chrome_windows7
.update(windows7_ua_addon
);
194 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
195 geturls_headers_chromium_windows7
.update(windows7_ua_addon
);
196 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
197 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
198 geturls_headers_opera_windows7
.update(windows7_ua_addon
);
199 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
200 geturls_headers_vivaldi_windows7
.update(windows7_ua_addon
);
201 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
202 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
203 geturls_headers_microsoft_edge_windows7
.update(windows7_ua_addon
);
204 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
205 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
), 'SEC-CH-UA-BITNESS': str(PyBitness
)};
206 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
207 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
208 geturls_headers
= geturls_headers_firefox_windows7
;
209 geturls_download_sleep
= 0;
211 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
212 if(outtype
=="print" and dbgenable
):
215 elif(outtype
=="log" and dbgenable
):
216 logging
.info(dbgtxt
);
218 elif(outtype
=="warning" and dbgenable
):
219 logging
.warning(dbgtxt
);
221 elif(outtype
=="error" and dbgenable
):
222 logging
.error(dbgtxt
);
224 elif(outtype
=="critical" and dbgenable
):
225 logging
.critical(dbgtxt
);
227 elif(outtype
=="exception" and dbgenable
):
228 logging
.exception(dbgtxt
);
230 elif(outtype
=="logalt" and dbgenable
):
231 logging
.log(dgblevel
, dbgtxt
);
233 elif(outtype
=="debug" and dbgenable
):
234 logging
.debug(dbgtxt
);
242 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
243 dbgout
= verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
248 def add_url_param(url
, **params
):
250 parts
= list(urlparse
.urlsplit(url
));
251 d
= dict(cgi
.parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
253 parts
[n
]=urlencode(d
);
254 return urlparse
.urlunsplit(parts
);
256 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
257 def which_exec(execfile):
258 for path
in os
.environ
["PATH"].split(":"):
259 if os
.path
.exists(path
+ "/" + execfile):
260 return path
+ "/" + execfile;
262 def listize(varlist
):
270 newlistreg
.update({ilx
: varlist
[il
]});
271 newlistrev
.update({varlist
[il
]: ilx
});
274 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
277 def twolistize(varlist
):
287 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
288 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
289 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
290 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
293 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
294 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
295 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
298 def arglistize(proexec
, *varlist
):
302 newarglist
= [proexec
];
304 if varlist
[il
][0] is not None:
305 newarglist
.append(varlist
[il
][0]);
306 if varlist
[il
][1] is not None:
307 newarglist
.append(varlist
[il
][1]);
311 def fix_header_names(header_dict
):
312 if(sys
.version
[0]=="2"):
313 header_dict
= {k
.title(): v
for k
, v
in header_dict
.iteritems()};
314 if(sys
.version
[0]>="3"):
315 header_dict
= {k
.title(): v
for k
, v
in header_dict
.items()};
318 # hms_string by ArcGIS Python Recipes
319 # https://arcpy.wordpress.com/2012/04/20/146/
320 def hms_string(sec_elapsed
):
321 h
= int(sec_elapsed
/ (60 * 60));
322 m
= int((sec_elapsed
% (60 * 60)) / 60);
323 s
= sec_elapsed
% 60.0;
324 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
326 # get_readable_size by Lipis
327 # http://stackoverflow.com/posts/14998888/revisions
328 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
330 if(unit
!="IEC" and unit
!="SI"):
333 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
334 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
337 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
338 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
343 if abs(bytes
) < unitsize
:
344 strformat
= "%3."+str(precision
)+"f%s";
345 pre_return_val
= (strformat
% (bytes
, unit
));
346 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
347 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
348 alt_return_val
= pre_return_val
.split();
349 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
352 strformat
= "%."+str(precision
)+"f%s";
353 pre_return_val
= (strformat
% (bytes
, "YiB"));
354 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
355 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
356 alt_return_val
= pre_return_val
.split();
357 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
360 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
362 usehashtypes
= usehashtypes
.lower();
363 getfilesize
= os
.path
.getsize(infile
);
364 return_val
= get_readable_size(getfilesize
, precision
, unit
);
366 hashtypelist
= usehashtypes
.split(",");
367 openfile
= open(infile
, "rb");
368 filecontents
= openfile
.read();
371 listnumend
= len(hashtypelist
);
372 while(listnumcount
< listnumend
):
373 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
374 hashtypelistup
= hashtypelistlow
.upper();
375 filehash
= hashlib
.new(hashtypelistup
);
376 filehash
.update(filecontents
);
377 filegethash
= filehash
.hexdigest();
378 return_val
.update({hashtypelistup
: filegethash
});
382 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
384 usehashtypes
= usehashtypes
.lower();
385 getfilesize
= len(instring
);
386 return_val
= get_readable_size(getfilesize
, precision
, unit
);
388 hashtypelist
= usehashtypes
.split(",");
390 listnumend
= len(hashtypelist
);
391 while(listnumcount
< listnumend
):
392 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
393 hashtypelistup
= hashtypelistlow
.upper();
394 filehash
= hashlib
.new(hashtypelistup
);
395 if(sys
.version
[0]=="2"):
396 filehash
.update(instring
);
397 if(sys
.version
[0]>="3"):
398 filehash
.update(instring
.encode('utf-8'));
399 filegethash
= filehash
.hexdigest();
400 return_val
.update({hashtypelistup
: filegethash
});
404 def http_status_to_reason(code
):
407 101: 'Switching Protocols',
412 203: 'Non-Authoritative Information',
414 205: 'Reset Content',
415 206: 'Partial Content',
417 208: 'Already Reported',
419 300: 'Multiple Choices',
420 301: 'Moved Permanently',
425 307: 'Temporary Redirect',
426 308: 'Permanent Redirect',
429 402: 'Payment Required',
432 405: 'Method Not Allowed',
433 406: 'Not Acceptable',
434 407: 'Proxy Authentication Required',
435 408: 'Request Timeout',
438 411: 'Length Required',
439 412: 'Precondition Failed',
440 413: 'Payload Too Large',
442 415: 'Unsupported Media Type',
443 416: 'Range Not Satisfiable',
444 417: 'Expectation Failed',
445 421: 'Misdirected Request',
446 422: 'Unprocessable Entity',
448 424: 'Failed Dependency',
449 426: 'Upgrade Required',
450 428: 'Precondition Required',
451 429: 'Too Many Requests',
452 431: 'Request Header Fields Too Large',
453 451: 'Unavailable For Legal Reasons',
454 500: 'Internal Server Error',
455 501: 'Not Implemented',
457 503: 'Service Unavailable',
458 504: 'Gateway Timeout',
459 505: 'HTTP Version Not Supported',
460 506: 'Variant Also Negotiates',
461 507: 'Insufficient Storage',
462 508: 'Loop Detected',
464 511: 'Network Authentication Required'
466 return reasons
.get(code
, 'Unknown Status Code');
468 def ftp_status_to_reason(code
):
470 110: 'Restart marker reply',
471 120: 'Service ready in nnn minutes',
472 125: 'Data connection already open; transfer starting',
473 150: 'File status okay; about to open data connection',
475 202: 'Command not implemented, superfluous at this site',
476 211: 'System status, or system help reply',
477 212: 'Directory status',
480 215: 'NAME system type',
481 220: 'Service ready for new user',
482 221: 'Service closing control connection',
483 225: 'Data connection open; no transfer in progress',
484 226: 'Closing data connection',
485 227: 'Entering Passive Mode',
486 230: 'User logged in, proceed',
487 250: 'Requested file action okay, completed',
488 257: '"PATHNAME" created',
489 331: 'User name okay, need password',
490 332: 'Need account for login',
491 350: 'Requested file action pending further information',
492 421: 'Service not available, closing control connection',
493 425: 'Can\'t open data connection',
494 426: 'Connection closed; transfer aborted',
495 450: 'Requested file action not taken',
496 451: 'Requested action aborted. Local error in processing',
497 452: 'Requested action not taken. Insufficient storage space in system',
498 500: 'Syntax error, command unrecognized',
499 501: 'Syntax error in parameters or arguments',
500 502: 'Command not implemented',
501 503: 'Bad sequence of commands',
502 504: 'Command not implemented for that parameter',
503 530: 'Not logged in',
504 532: 'Need account for storing files',
505 550: 'Requested action not taken. File unavailable',
506 551: 'Requested action aborted. Page type unknown',
507 552: 'Requested file action aborted. Exceeded storage allocation',
508 553: 'Requested action not taken. File name not allowed'
510 return reasons
.get(code
, 'Unknown Status Code');
512 def sftp_status_to_reason(code
):
516 2: 'SSH_FX_NO_SUCH_FILE',
517 3: 'SSH_FX_PERMISSION_DENIED',
519 5: 'SSH_FX_BAD_MESSAGE',
520 6: 'SSH_FX_NO_CONNECTION',
521 7: 'SSH_FX_CONNECTION_LOST',
522 8: 'SSH_FX_OP_UNSUPPORTED'
524 return reasons
.get(code
, 'Unknown Status Code');
526 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
527 if isinstance(headers
, dict):
529 if(sys
.version
[0]=="2"):
530 for headkey
, headvalue
in headers
.iteritems():
531 returnval
.append((headkey
, headvalue
));
532 if(sys
.version
[0]>="3"):
533 for headkey
, headvalue
in headers
.items():
534 returnval
.append((headkey
, headvalue
));
535 elif isinstance(headers
, list):
541 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
542 if isinstance(headers
, dict):
544 if(sys
.version
[0]=="2"):
545 for headkey
, headvalue
in headers
.iteritems():
546 returnval
.append(headkey
+": "+headvalue
);
547 if(sys
.version
[0]>="3"):
548 for headkey
, headvalue
in headers
.items():
549 returnval
.append(headkey
+": "+headvalue
);
550 elif isinstance(headers
, list):
556 def make_http_headers_from_pycurl_to_dict(headers
):
558 headers
= headers
.strip().split('\r\n');
559 for header
in headers
:
560 parts
= header
.split(': ', 1)
563 header_dict
[key
.title()] = value
;
566 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", compression_supported
), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
567 if isinstance(headers
, list):
572 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
574 elif isinstance(headers
, dict):
580 def get_httplib_support(checkvalue
=None):
581 global haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
583 returnval
.append("ftp");
584 returnval
.append("httplib");
586 returnval
.append("httplib2");
587 returnval
.append("urllib");
589 returnval
.append("urllib3");
590 returnval
.append("request3");
591 returnval
.append("request");
593 returnval
.append("requests");
595 returnval
.append("httpx");
596 returnval
.append("httpx2");
598 returnval
.append("mechanize");
600 returnval
.append("pycurl");
602 returnval
.append("sftp");
604 returnval
.append("pysftp");
605 if(not checkvalue
is None):
606 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
607 checkvalue
= "urllib";
608 if(checkvalue
=="httplib1"):
609 checkvalue
= "httplib";
610 if(checkvalue
in returnval
):
616 def check_httplib_support(checkvalue
="urllib"):
617 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
618 checkvalue
= "urllib";
619 if(checkvalue
=="httplib1"):
620 checkvalue
= "httplib";
621 returnval
= get_httplib_support(checkvalue
);
624 def get_httplib_support_list():
625 returnval
= get_httplib_support(None);
628 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", sleep
=-1, timeout
=10):
629 global geturls_download_sleep
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
631 sleep
= geturls_download_sleep
;
632 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
633 httplibuse
= "urllib";
634 if(httplibuse
=="httplib1"):
635 httplibuse
= "httplib";
636 if(not haverequests
and httplibuse
=="requests"):
637 httplibuse
= "urllib";
638 if(not havehttpx
and httplibuse
=="httpx"):
639 httplibuse
= "urllib";
640 if(not havehttpx
and httplibuse
=="httpx2"):
641 httplibuse
= "urllib";
642 if(not havehttpcore
and httplibuse
=="httpcore"):
643 httplibuse
= "urllib";
644 if(not havehttpcore
and httplibuse
=="httpcore2"):
645 httplibuse
= "urllib";
646 if(not havemechanize
and httplibuse
=="mechanize"):
647 httplibuse
= "urllib";
648 if(not havepycurl
and httplibuse
=="pycurl"):
649 httplibuse
= "urllib";
650 if(not havehttplib2
and httplibuse
=="httplib2"):
651 httplibuse
= "httplib";
652 if(not haveparamiko
and httplibuse
=="sftp"):
654 if(not havepysftp
and httplibuse
=="pysftp"):
656 urlparts
= urlparse
.urlparse(httpurl
);
657 if(isinstance(httpheaders
, list)):
658 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
659 httpheaders
= fix_header_names(httpheaders
);
660 if(httpuseragent
is not None):
661 if('User-Agent' in httpheaders
):
662 httpheaders
['User-Agent'] = httpuseragent
;
664 httpuseragent
.update({'User-Agent': httpuseragent
});
665 if(httpreferer
is not None):
666 if('Referer' in httpheaders
):
667 httpheaders
['Referer'] = httpreferer
;
669 httpuseragent
.update({'Referer': httpreferer
});
670 if(urlparts
.username
is not None or urlparts
.password
is not None):
671 if(sys
.version
[0]=="2"):
672 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
673 if(sys
.version
[0]>="3"):
674 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
675 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
676 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
677 if(httplibuse
=="urllib" or httplibuse
=="mechanize"):
678 if(isinstance(httpheaders
, dict)):
679 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
680 if(httplibuse
=="pycurl"):
681 if(isinstance(httpheaders
, dict)):
682 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
683 geturls_opener
.addheaders
= httpheaders
;
685 if(postdata
is not None and not isinstance(postdata
, dict)):
686 postdata
= urlencode(postdata
);
687 if(httplibuse
=="urllib" or httplibuse
=="request"):
688 geturls_request
= Request(httpurl
);
690 if(httpmethod
=="GET"):
691 geturls_text
= geturls_opener
.open(geturls_request
);
692 elif(httpmethod
=="POST"):
693 geturls_text
= geturls_opener
.open(geturls_request
, data
=postdata
);
695 geturls_text
= geturls_opener
.open(geturls_request
);
696 except HTTPError
as geturls_text_error
:
697 geturls_text
= geturls_text_error
;
698 log
.info("Error With URL "+httpurl
);
700 log
.info("Error With URL "+httpurl
);
702 except socket
.timeout
:
703 log
.info("Error With URL "+httpurl
);
705 httpcodeout
= geturls_text
.getcode();
707 httpcodereason
= geturls_text
.reason
;
708 except AttributeError:
709 httpcodereason
= http_status_to_reason(geturls_text
.getcode());
711 httpversionout
= geturls_text
.version
;
712 except AttributeError:
713 httpversionout
= "1.1";
714 httpmethodout
= geturls_request
.get_method();
715 httpurlout
= geturls_text
.geturl();
716 httpheaderout
= geturls_text
.info();
717 httpheadersentout
= httpheaders
;
718 elif(httplibuse
=="httplib"):
719 if(urlparts
[0]=="http"):
720 httpconn
= HTTPConnection(urlparts
[1], timeout
=timeout
);
721 elif(urlparts
[0]=="https"):
722 httpconn
= HTTPSConnection(urlparts
[1], timeout
=timeout
);
725 if(postdata
is not None and not isinstance(postdata
, dict)):
726 postdata
= urlencode(postdata
);
728 if(httpmethod
=="GET"):
729 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
730 elif(httpmethod
=="POST"):
731 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
733 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
734 except socket
.timeout
:
735 log
.info("Error With URL "+httpurl
);
737 except socket
.gaierror
:
738 log
.info("Error With URL "+httpurl
);
740 except BlockingIOError
:
741 log
.info("Error With URL "+httpurl
);
743 geturls_text
= httpconn
.getresponse();
744 httpcodeout
= geturls_text
.status
;
745 httpcodereason
= geturls_text
.reason
;
746 if(geturls_text
.version
=="10"):
747 httpversionout
= "1.0";
749 httpversionout
= "1.1";
750 httpmethodout
= geturls_text
._method
;
751 httpurlout
= geturls_text
.geturl();
752 httpheaderout
= geturls_text
.getheaders();
753 httpheadersentout
= httpheaders
;
754 elif(httplibuse
=="httplib2"):
755 if(urlparts
[0]=="http"):
756 httpconn
= HTTPConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
757 elif(urlparts
[0]=="https"):
758 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1], timeout
=timeout
);
761 if(postdata
is not None and not isinstance(postdata
, dict)):
762 postdata
= urlencode(postdata
);
764 if(httpmethod
=="GET"):
765 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
766 elif(httpmethod
=="POST"):
767 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
769 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
770 except socket
.timeout
:
771 log
.info("Error With URL "+httpurl
);
773 except socket
.gaierror
:
774 log
.info("Error With URL "+httpurl
);
776 except BlockingIOError
:
777 log
.info("Error With URL "+httpurl
);
779 geturls_text
= httpconn
.getresponse();
780 httpcodeout
= geturls_text
.status
;
781 httpcodereason
= geturls_text
.reason
;
782 if(geturls_text
.version
=="10"):
783 httpversionout
= "1.0";
785 httpversionout
= "1.1";
786 httpmethodout
= httpmethod
;
787 httpurlout
= httpurl
;
788 httpheaderout
= geturls_text
.getheaders();
789 httpheadersentout
= httpheaders
;
790 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
791 timeout
= urllib3
.util
.Timeout(connect
=timeout
, read
=timeout
);
792 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
, timeout
=timeout
);
794 if(httpmethod
=="GET"):
795 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
796 elif(httpmethod
=="POST"):
797 geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
799 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
800 except urllib3
.exceptions
.ConnectTimeoutError
:
801 log
.info("Error With URL "+httpurl
);
803 except urllib3
.exceptions
.ConnectError
:
804 log
.info("Error With URL "+httpurl
);
806 except urllib3
.exceptions
.MaxRetryError
:
807 log
.info("Error With URL "+httpurl
);
809 except socket
.timeout
:
810 log
.info("Error With URL "+httpurl
);
813 log
.info("Error With URL "+httpurl
);
815 httpcodeout
= geturls_text
.status
;
816 httpcodereason
= geturls_text
.reason
;
817 if(geturls_text
.version
=="10"):
818 httpversionout
= "1.0";
820 httpversionout
= "1.1";
821 httpmethodout
= httpmethod
;
822 httpurlout
= geturls_text
.geturl();
823 httpheaderout
= geturls_text
.info();
824 httpheadersentout
= httpheaders
;
825 elif(httplibuse
=="requests"):
827 reqsession
= requests
.Session();
828 if(httpmethod
=="GET"):
829 geturls_text
= reqsession
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
830 elif(httpmethod
=="POST"):
831 geturls_text
= reqsession
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
833 geturls_text
= reqsession
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
834 except requests
.exceptions
.ConnectTimeout
:
835 log
.info("Error With URL "+httpurl
);
837 except requests
.exceptions
.ConnectError
:
838 log
.info("Error With URL "+httpurl
);
840 except socket
.timeout
:
841 log
.info("Error With URL "+httpurl
);
843 httpcodeout
= geturls_text
.status_code
;
844 httpcodereason
= geturls_text
.reason
;
845 if(geturls_text
.raw
.version
=="10"):
846 httpversionout
= "1.0";
848 httpversionout
= "1.1";
849 httpmethodout
= httpmethod
;
850 httpurlout
= geturls_text
.url
;
851 httpheaderout
= geturls_text
.headers
;
852 httpheadersentout
= geturls_text
.request
.headers
;
853 elif(httplibuse
=="httpx"):
855 if(httpmethod
=="GET"):
856 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
857 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
858 elif(httpmethod
=="POST"):
859 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
860 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
862 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
863 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
864 except httpx
.ConnectTimeout
:
865 log
.info("Error With URL "+httpurl
);
867 except httpx
.ConnectError
:
868 log
.info("Error With URL "+httpurl
);
870 except socket
.timeout
:
871 log
.info("Error With URL "+httpurl
);
873 httpcodeout
= geturls_text
.status_code
;
874 httpcodereason
= geturls_text
.reason_phrase
;
875 httpversionout
= geturls_text
.http_version
;
876 httpmethodout
= httpmethod
;
877 httpurlout
= str(geturls_text
.url
);
878 httpheaderout
= geturls_text
.headers
;
879 httpheadersentout
= geturls_text
.request
.headers
;
880 elif(httplibuse
=="httpx2"):
882 if(httpmethod
=="GET"):
883 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
884 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
885 elif(httpmethod
=="POST"):
886 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
887 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
889 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
890 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
891 except httpx
.ConnectTimeout
:
892 log
.info("Error With URL "+httpurl
);
894 except httpx
.ConnectError
:
895 log
.info("Error With URL "+httpurl
);
897 except socket
.timeout
:
898 log
.info("Error With URL "+httpurl
);
900 httpcodeout
= geturls_text
.status_code
;
901 httpcodereason
= geturls_text
.reason
;
902 httpversionout
= geturls_text
.http_version
;
903 httpmethodout
= httpmethod
;
904 httpurlout
= str(geturls_text
.url
);
905 httpheaderout
= geturls_text
.headers
;
906 httpheadersentout
= geturls_text
.request
.headers
;
907 elif(httplibuse
=="httpcore"):
909 if(httpmethod
=="GET"):
910 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
911 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
912 elif(httpmethod
=="POST"):
913 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
914 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
916 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
917 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
918 except httpcore
.ConnectTimeout
:
919 log
.info("Error With URL "+httpurl
);
921 except httpcore
.ConnectError
:
922 log
.info("Error With URL "+httpurl
);
924 except socket
.timeout
:
925 log
.info("Error With URL "+httpurl
);
927 httpcodeout
= geturls_text
.status
;
928 httpcodereason
= http_status_to_reason(geturls_text
.status
);
929 httpversionout
= "1.1";
930 httpmethodout
= httpmethod
;
931 httpurlout
= str(httpurl
);
932 httpheaderout
= geturls_text
.headers
;
933 httpheadersentout
= httpheaders
;
934 elif(httplibuse
=="httpcore2"):
936 if(httpmethod
=="GET"):
937 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
938 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
939 elif(httpmethod
=="POST"):
940 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
941 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
943 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
944 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
945 except httpcore
.ConnectTimeout
:
946 log
.info("Error With URL "+httpurl
);
948 except httpcore
.ConnectError
:
949 log
.info("Error With URL "+httpurl
);
951 except socket
.timeout
:
952 log
.info("Error With URL "+httpurl
);
954 httpcodeout
= geturls_text
.status
;
955 httpcodereason
= geturls_text
.reason
;
956 httpversionout
= "1.1";
957 httpmethodout
= httpmethod
;
958 httpurlout
= str(httpurl
);
959 httpheaderout
= geturls_text
.headers
;
960 httpheadersentout
= httpheaders
;
961 elif(httplibuse
=="mechanize"):
962 geturls_opener
= mechanize
.Browser();
963 if(isinstance(httpheaders
, dict)):
964 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
966 geturls_opener
.addheaders
= httpheaders
;
967 geturls_opener
.set_cookiejar(httpcookie
);
968 geturls_opener
.set_handle_robots(False);
969 if(postdata
is not None and not isinstance(postdata
, dict)):
970 postdata
= urlencode(postdata
);
972 if(httpmethod
=="GET"):
973 geturls_text
= geturls_opener
.open(httpurl
);
974 elif(httpmethod
=="POST"):
975 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
977 geturls_text
= geturls_opener
.open(httpurl
);
978 except mechanize
.HTTPError
as geturls_text_error
:
979 geturls_text
= geturls_text_error
;
980 log
.info("Error With URL "+httpurl
);
982 log
.info("Error With URL "+httpurl
);
984 except socket
.timeout
:
985 log
.info("Error With URL "+httpurl
);
987 httpcodeout
= geturls_text
.code
;
988 httpcodereason
= geturls_text
.msg
;
989 httpversionout
= "1.1";
990 httpmethodout
= httpmethod
;
991 httpurlout
= geturls_text
.geturl();
992 httpheaderout
= geturls_text
.info();
993 reqhead
= geturls_opener
.request
;
994 httpheadersentout
= reqhead
.header_items();
995 elif(httplibuse
=="pycurl"):
996 retrieved_body
= BytesIO();
997 retrieved_headers
= BytesIO();
999 if(httpmethod
=="GET"):
1000 geturls_text
= pycurl
.Curl();
1001 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
1002 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
1003 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
1004 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
1005 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
1006 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
1007 geturls_text
.perform();
1008 elif(httpmethod
=="POST"):
1009 geturls_text
= pycurl
.Curl();
1010 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
1011 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
1012 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
1013 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
1014 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
1015 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
1016 geturls_text
.setopt(geturls_text
.POST
, True);
1017 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
1018 geturls_text
.perform();
1020 geturls_text
= pycurl
.Curl();
1021 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
1022 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
1023 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
1024 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
1025 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
1026 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
1027 geturls_text
.perform();
1028 retrieved_headers
.seek(0);
1029 if(sys
.version
[0]=="2"):
1030 pycurlhead
= retrieved_headers
.read();
1031 if(sys
.version
[0]>="3"):
1032 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
1033 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+) ([A-Za-z\s]+)$', pycurlhead
.splitlines()[0])[0];
1034 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
1035 retrieved_body
.seek(0);
1036 except socket
.timeout
:
1037 log
.info("Error With URL "+httpurl
);
1039 except socket
.gaierror
:
1040 log
.info("Error With URL "+httpurl
);
1043 log
.info("Error With URL "+httpurl
);
1045 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
1046 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
1047 httpversionout
= pyhttpverinfo
[0];
1048 httpmethodout
= httpmethod
;
1049 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
1050 httpheaderout
= pycurlheadersout
;
1051 httpheadersentout
= httpheaders
;
1052 elif(httplibuse
=="ftp"):
1053 geturls_text
= download_file_from_ftp_file(httpurl
);
1054 if(not geturls_text
):
1056 log
.info("Downloading URL "+httpurl
);
1057 returnval_content
= geturls_text
.read()[:];
1058 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
1059 geturls_text
.close();
1060 elif(httplibuse
=="sftp"):
1061 geturls_text
= download_file_from_sftp_file(httpurl
);
1062 if(not geturls_text
):
1064 log
.info("Downloading URL "+httpurl
);
1065 returnval_content
= geturls_text
.read()[:];
1066 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
1067 geturls_text
.close();
1069 elif(httplibuse
=="pysftp"):
1070 geturls_text
= download_file_from_pysftp_file(httpurl
);
1071 if(not geturls_text
):
1073 log
.info("Downloading URL "+httpurl
);
1074 returnval_content
= geturls_text
.read()[:];
1075 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': None, 'Version': None, 'Method': None, 'HeadersSent': None, 'URL': httpurl
, 'Code': None};
1076 geturls_text
.close();
1080 if(isinstance(httpheaderout
, list) and httplibuse
!="pycurl"):
1081 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1082 if(isinstance(httpheaderout
, list) and httplibuse
=="pycurl"):
1083 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
1084 if(sys
.version
[0]=="2"):
1086 prehttpheaderout
= httpheaderout
;
1087 httpheaderkeys
= httpheaderout
.keys();
1088 imax
= len(httpheaderkeys
);
1092 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1094 except AttributeError:
1096 httpheaderout
= fix_header_names(httpheaderout
);
1097 if(isinstance(httpheadersentout
, list) and httplibuse
!="pycurl"):
1098 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1099 if(isinstance(httpheadersentout
, list) and httplibuse
=="pycurl"):
1100 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
1101 httpheadersentout
= fix_header_names(httpheadersentout
);
1102 log
.info("Downloading URL "+httpurl
);
1103 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="request3" or httplibuse
=="httplib" or httplibuse
=="httplib2" or httplibuse
=="urllib3" or httplibuse
=="mechanize" or httplibuse
=="httpx" or httplibuse
=="httpx2" or httplibuse
=="httpcore" or httplibuse
=="httpcore2"):
1104 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
1105 strbuf
= BytesIO(geturls_text
.read());
1106 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1107 returnval_content
= gzstrbuf
.read()[:];
1108 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
1109 returnval_content
= geturls_text
.read()[:];
1110 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1111 returnval_content
= geturls_text
.read()[:];
1112 returnval_content
= brotli
.decompress(returnval_content
);
1113 geturls_text
.close();
1114 elif(httplibuse
=="requests"):
1115 log
.info("Downloading URL "+httpurl
);
1116 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
1117 strbuf
= BytesIO(geturls_text
.raw
.read());
1118 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1119 returnval_content
= gzstrbuf
.read()[:];
1120 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
1121 returnval_content
= geturls_text
.raw
.read()[:];
1122 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1123 returnval_content
= geturls_text
.raw
.read()[:];
1124 returnval_content
= brotli
.decompress(returnval_content
);
1125 geturls_text
.close();
1126 elif(httplibuse
=="pycurl"):
1127 log
.info("Downloading URL "+httpurl
);
1128 if(httpheaderout
.get("Content-Encoding")=="gzip" or httpheaderout
.get("Content-Encoding")=="deflate"):
1129 strbuf
= BytesIO(retrieved_body
.read());
1130 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1131 returnval_content
= gzstrbuf
.read()[:];
1132 if(httpheaderout
.get("Content-Encoding")!="gzip" and httpheaderout
.get("Content-Encoding")!="deflate" and httpheaderout
.get("Content-Encoding")!="br"):
1133 returnval_content
= retrieved_body
.read()[:];
1134 if(httpheaderout
.get("Content-Encoding")=="br" and havebrotli
):
1135 returnval_content
= retrieved_body
.read()[:];
1136 returnval_content
= brotli
.decompress(returnval_content
);
1137 geturls_text
.close();
1138 elif(httplibuse
=="ftp" or httplibuse
=="sftp" or httplibuse
=="pysftp"):
1142 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1145 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1146 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
1147 exec_time_start
= time
.time();
1148 myhash
= hashlib
.new("sha1");
1149 if(sys
.version
[0]=="2"):
1150 myhash
.update(httpurl
);
1151 myhash
.update(str(buffersize
));
1152 myhash
.update(str(exec_time_start
));
1153 if(sys
.version
[0]>="3"):
1154 myhash
.update(httpurl
.encode('utf-8'));
1155 myhash
.update(str(buffersize
).encode('utf-8'));
1156 myhash
.update(str(exec_time_start
).encode('utf-8'));
1157 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1159 sleep
= geturls_download_sleep
;
1160 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
1161 httplibuse
= "urllib";
1162 if(httplibuse
=="httplib1"):
1163 httplibuse
= "httplib";
1164 if(not haverequests
and httplibuse
=="requests"):
1165 httplibuse
= "urllib";
1166 if(not havehttpx
and httplibuse
=="httpx"):
1167 httplibuse
= "urllib";
1168 if(not havehttpx
and httplibuse
=="httpx2"):
1169 httplibuse
= "urllib";
1170 if(not havehttpcore
and httplibuse
=="httpcore"):
1171 httplibuse
= "urllib";
1172 if(not havehttpcore
and httplibuse
=="httpcore2"):
1173 httplibuse
= "urllib";
1174 if(not havemechanize
and httplibuse
=="mechanize"):
1175 httplibuse
= "urllib";
1176 if(not havepycurl
and httplibuse
=="pycurl"):
1177 httplibuse
= "urllib";
1178 if(not havehttplib2
and httplibuse
=="httplib2"):
1179 httplibuse
= "httplib";
1180 if(not haveparamiko
and httplibuse
=="sftp"):
1182 if(not haveparamiko
and httplibuse
=="pysftp"):
1184 urlparts
= urlparse
.urlparse(httpurl
);
1185 if(isinstance(httpheaders
, list)):
1186 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1187 httpheaders
= fix_header_names(httpheaders
);
1188 if(ranges
[0] is not None):
1189 range_str
= "bytes="+str(range[0])+"-";
1190 if(ranges
[1] is not None and ranges
[1]>ranges
[0]):
1191 range_str
+= str(range[1]);
1192 if('Range' in httpheaders
):
1193 httpheaders
['Range'] = range_str
;
1195 httpuseragent
.update({'Range': range_str
});
1196 if(httpuseragent
is not None):
1197 if('User-Agent' in httpheaders
):
1198 httpheaders
['User-Agent'] = httpuseragent
;
1200 httpuseragent
.update({'User-Agent': httpuseragent
});
1201 if(httpreferer
is not None):
1202 if('Referer' in httpheaders
):
1203 httpheaders
['Referer'] = httpreferer
;
1205 httpuseragent
.update({'Referer': httpreferer
});
1206 if(urlparts
.username
is not None or urlparts
.password
is not None):
1207 if(sys
.version
[0]=="2"):
1208 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
));
1209 if(sys
.version
[0]>="3"):
1210 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1211 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1212 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1213 if(httplibuse
=="urllib" or httplibuse
=="mechanize"):
1214 if(isinstance(httpheaders
, dict)):
1215 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1216 if(httplibuse
=="pycurl"):
1217 if(isinstance(httpheaders
, dict)):
1218 httpheaders
= make_http_headers_from_dict_to_pycurl(httpheaders
);
1219 geturls_opener
.addheaders
= httpheaders
;
1221 if(httplibuse
=="urllib" or httplibuse
=="request"):
1223 geturls_request
= Request(httpurl
);
1224 if(httpmethod
=="GET"):
1225 geturls_text
= geturls_opener
.open(geturls_request
);
1226 elif(httpmethod
=="POST"):
1227 geturls_text
= geturls_opener
.open(geturls_request
, data
=postdata
);
1229 geturls_text
= geturls_opener
.open(geturls_request
);
1230 except HTTPError
as geturls_text_error
:
1231 geturls_text
= geturls_text_error
;
1232 log
.info("Error With URL "+httpurl
);
1234 log
.info("Error With URL "+httpurl
);
1236 except socket
.timeout
:
1237 log
.info("Error With URL "+httpurl
);
1239 except socket
.timeout
:
1240 log
.info("Error With URL "+httpurl
);
1242 httpcodeout
= geturls_text
.getcode();
1244 httpcodereason
= geturls_text
.reason
;
1245 except AttributeError:
1246 httpcodereason
= http_status_to_reason(geturls_text
.getcode());
1248 httpversionout
= geturls_text
.version
;
1249 except AttributeError:
1250 httpversionout
= "1.1";
1251 httpmethodout
= geturls_request
.get_method();
1252 httpurlout
= geturls_text
.geturl();
1253 httpheaderout
= geturls_text
.info();
1254 httpheadersentout
= httpheaders
;
1255 elif(httplibuse
=="httplib"):
1256 if(urlparts
[0]=="http"):
1257 httpconn
= HTTPConnection(urlparts
[1], timeout
=timeout
);
1258 elif(urlparts
[0]=="https"):
1259 httpconn
= HTTPSConnection(urlparts
[1], timeout
=timeout
);
1262 if(postdata
is not None and not isinstance(postdata
, dict)):
1263 postdata
= urlencode(postdata
);
1265 if(httpmethod
=="GET"):
1266 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1267 elif(httpmethod
=="POST"):
1268 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1270 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1271 except socket
.timeout
:
1272 log
.info("Error With URL "+httpurl
);
1274 except socket
.gaierror
:
1275 log
.info("Error With URL "+httpurl
);
1277 except BlockingIOError
:
1278 log
.info("Error With URL "+httpurl
);
1280 geturls_text
= httpconn
.getresponse();
1281 httpcodeout
= geturls_text
.status
;
1282 httpcodereason
= geturls_text
.reason
;
1283 if(geturls_text
.version
=="10"):
1284 httpversionout
= "1.0";
1286 httpversionout
= "1.1";
1287 httpmethodout
= geturls_text
._method
;
1288 httpurlout
= geturls_text
.geturl();
1289 httpheaderout
= geturls_text
.getheaders();
1290 httpheadersentout
= httpheaders
;
1291 elif(httplibuse
=="httplib2"):
1293 if(httpmethod
=="GET"):
1294 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1295 elif(httpmethod
=="POST"):
1296 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1298 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1299 except socket
.timeout
:
1300 log
.info("Error With URL "+httpurl
);
1302 except socket
.gaierror
:
1303 log
.info("Error With URL "+httpurl
);
1305 except BlockingIOError
:
1306 log
.info("Error With URL "+httpurl
);
1308 geturls_text
= httpconn
.getresponse();
1309 httpcodeout
= geturls_text
.status
;
1310 httpcodereason
= geturls_text
.reason
;
1311 if(geturls_text
.version
=="10"):
1312 httpversionout
= "1.0";
1314 httpversionout
= "1.1";
1315 httpmethodout
= httpmethod
;
1316 httpurlout
= geturls_text
.geturl();
1317 httpheaderout
= geturls_text
.getheaders();
1318 httpheadersentout
= httpheaders
;
1319 elif(httplibuse
=="urllib3" or httplibuse
=="request3"):
1320 timeout
= urllib3
.util
.Timeout(connect
=timeout
, read
=timeout
);
1321 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
, timeout
=timeout
);
1323 if(httpmethod
=="GET"):
1324 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1325 elif(httpmethod
=="POST"):
1326 geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
1328 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1329 except urllib3
.exceptions
.ConnectTimeoutError
:
1330 log
.info("Error With URL "+httpurl
);
1332 except urllib3
.exceptions
.ConnectError
:
1333 log
.info("Error With URL "+httpurl
);
1335 except urllib3
.exceptions
.MaxRetryError
:
1336 log
.info("Error With URL "+httpurl
);
1338 except socket
.timeout
:
1339 log
.info("Error With URL "+httpurl
);
1342 log
.info("Error With URL "+httpurl
);
1344 httpcodeout
= geturls_text
.status
;
1345 httpcodereason
= geturls_text
.reason
;
1346 if(geturls_text
.version
=="10"):
1347 httpversionout
= "1.0";
1349 httpversionout
= "1.1";
1350 httpmethodout
= httpmethod
;
1351 httpurlout
= geturls_text
.geturl();
1352 httpheaderout
= geturls_text
.info();
1353 httpheadersentout
= httpheaders
;
1354 elif(httplibuse
=="requests"):
1356 reqsession
= requests
.Session();
1357 if(httpmethod
=="GET"):
1358 geturls_text
= reqsession
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1359 elif(httpmethod
=="POST"):
1360 geturls_text
= reqsession
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1362 geturls_text
= reqsession
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1363 except requests
.exceptions
.ConnectTimeout
:
1364 log
.info("Error With URL "+httpurl
);
1366 except requests
.exceptions
.ConnectError
:
1367 log
.info("Error With URL "+httpurl
);
1369 except socket
.timeout
:
1370 log
.info("Error With URL "+httpurl
);
1372 httpcodeout
= geturls_text
.status_code
;
1373 httpcodereason
= geturls_text
.reason
;
1374 if(geturls_text
.raw
.version
=="10"):
1375 httpversionout
= "1.0";
1377 httpversionout
= "1.1";
1378 httpmethodout
= httpmethod
;
1379 httpurlout
= geturls_text
.url
;
1380 httpheaderout
= geturls_text
.headers
;
1381 httpheadersentout
= geturls_text
.request
.headers
;
1382 elif(httplibuse
=="httpx"):
1384 if(httpmethod
=="GET"):
1385 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1386 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
1387 elif(httpmethod
=="POST"):
1388 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1389 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1391 httpx_pool
= httpx
.Client(http1
=True, http2
=False, trust_env
=True);
1392 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
1393 except httpx
.ConnectTimeout
:
1394 log
.info("Error With URL "+httpurl
);
1396 except httpx
.ConnectError
:
1397 log
.info("Error With URL "+httpurl
);
1399 except socket
.timeout
:
1400 log
.info("Error With URL "+httpurl
);
1402 httpcodeout
= geturls_text
.status_code
;
1403 httpcodereason
= geturls_text
.reason_phrase
;
1404 httpversionout
= geturls_text
.http_version
;
1405 httpmethodout
= httpmethod
;
1406 httpurlout
= str(geturls_text
.url
);
1407 httpheaderout
= geturls_text
.headers
;
1408 httpheadersentout
= geturls_text
.request
.headers
;
1409 elif(httplibuse
=="httpx2"):
1411 if(httpmethod
=="GET"):
1412 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
1413 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
1414 elif(httpmethod
=="POST"):
1415 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
1416 geturls_text
= httpx_pool
.post(httpurl
, timeout
=timeout
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1418 httpx_pool
= httpx
.Client(http1
=True, http2
=True, trust_env
=True);
1419 geturls_text
= httpx_pool
.get(httpurl
, timeout
=timeout
, headers
=httpheaders
, cookies
=httpcookie
);
1420 except httpx
.ConnectTimeout
:
1421 log
.info("Error With URL "+httpurl
);
1423 except httpx
.ConnectError
:
1424 log
.info("Error With URL "+httpurl
);
1426 except socket
.timeout
:
1427 log
.info("Error With URL "+httpurl
);
1429 httpcodeout
= geturls_text
.status_code
;
1430 httpcodereason
= geturls_text
.reason_phrase
;
1431 httpversionout
= geturls_text
.http_version
;
1432 httpmethodout
= httpmethod
;
1433 httpurlout
= str(geturls_text
.url
);
1434 httpheaderout
= geturls_text
.headers
;
1435 httpheadersentout
= geturls_text
.request
.headers
;
1436 elif(httplibuse
=="httpcore"):
1438 if(httpmethod
=="GET"):
1439 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
1440 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1441 elif(httpmethod
=="POST"):
1442 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
1443 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
1445 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=False);
1446 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1447 except httpcore
.ConnectTimeout
:
1448 log
.info("Error With URL "+httpurl
);
1450 except httpcore
.ConnectError
:
1451 log
.info("Error With URL "+httpurl
);
1453 except socket
.timeout
:
1454 log
.info("Error With URL "+httpurl
);
1456 httpcodeout
= geturls_text
.status
;
1457 httpcodereason
= http_status_to_reason(geturls_text
.status
);
1458 httpversionout
= "1.1";
1459 httpmethodout
= httpmethod
;
1460 httpurlout
= str(httpurl
);
1461 httpheaderout
= geturls_text
.headers
;
1462 httpheadersentout
= httpheaders
;
1463 elif(httplibuse
=="httpcore2"):
1465 if(httpmethod
=="GET"):
1466 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
1467 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1468 elif(httpmethod
=="POST"):
1469 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
1470 geturls_text
= httpx_pool
.request("GET", httpurl
, data
=postdata
, headers
=httpheaders
);
1472 httpx_pool
= httpcore
.ConnectionPool(http1
=True, http2
=True);
1473 geturls_text
= httpx_pool
.request("GET", httpurl
, headers
=httpheaders
);
1474 except httpcore
.ConnectTimeout
:
1475 log
.info("Error With URL "+httpurl
);
1477 except httpcore
.ConnectError
:
1478 log
.info("Error With URL "+httpurl
);
1480 except socket
.timeout
:
1481 log
.info("Error With URL "+httpurl
);
1483 httpcodeout
= geturls_text
.status
;
1484 httpcodereason
= geturls_text
.reason_phrase
;
1485 httpversionout
= "1.1";
1486 httpmethodout
= httpmethod
;
1487 httpurlout
= str(httpurl
);
1488 httpheaderout
= geturls_text
.headers
;
1489 httpheadersentout
= httpheaders
;
1490 elif(httplibuse
=="mechanize"):
1491 geturls_opener
= mechanize
.Browser();
1492 if(isinstance(httpheaders
, dict)):
1493 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1495 geturls_opener
.addheaders
= httpheaders
;
1496 geturls_opener
.set_cookiejar(httpcookie
);
1497 geturls_opener
.set_handle_robots(False);
1498 if(postdata
is not None and not isinstance(postdata
, dict)):
1499 postdata
= urlencode(postdata
);
1501 if(httpmethod
=="GET"):
1502 geturls_text
= geturls_opener
.open(httpurl
);
1503 elif(httpmethod
=="POST"):
1504 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
1506 geturls_text
= geturls_opener
.open(httpurl
);
1507 except mechanize
.HTTPError
as geturls_text_error
:
1508 geturls_text
= geturls_text_error
;
1509 log
.info("Error With URL "+httpurl
);
1511 log
.info("Error With URL "+httpurl
);
1513 except socket
.timeout
:
1514 log
.info("Error With URL "+httpurl
);
1516 httpcodeout
= geturls_text
.code
;
1517 httpcodereason
= geturls_text
.msg
;
1518 httpversionout
= "1.1";
1519 httpmethodout
= httpmethod
;
1520 httpurlout
= geturls_text
.geturl();
1521 httpheaderout
= geturls_text
.info();
1522 reqhead
= geturls_opener
.request
;
1523 httpheadersentout
= reqhead
.header_items();
1524 elif(httplibuse
=="pycurl"):
1525 retrieved_body
= BytesIO();
1526 retrieved_headers
= BytesIO();
1528 if(httpmethod
=="GET"):
1529 geturls_text
= pycurl
.Curl();
1530 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
1531 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
1532 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
1533 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
1534 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
1535 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
1536 geturls_text
.perform();
1537 elif(httpmethod
=="POST"):
1538 geturls_text
= pycurl
.Curl();
1539 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
1540 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
1541 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
1542 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
1543 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
1544 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
1545 geturls_text
.setopt(geturls_text
.POST
, True);
1546 geturls_text
.setopt(geturls_text
.POSTFIELDS
, postdata
);
1547 geturls_text
.perform();
1549 geturls_text
= pycurl
.Curl();
1550 geturls_text
.setopt(geturls_text
.URL
, httpurl
);
1551 geturls_text
.setopt(geturls_text
.WRITEFUNCTION
, retrieved_body
.write
);
1552 geturls_text
.setopt(geturls_text
.HTTPHEADER
, httpheaders
);
1553 geturls_text
.setopt(geturls_text
.HEADERFUNCTION
, retrieved_headers
.write
);
1554 geturls_text
.setopt(geturls_text
.FOLLOWLOCATION
, True);
1555 geturls_text
.setopt(geturls_text
.TIMEOUT
, timeout
);
1556 geturls_text
.perform();
1557 retrieved_headers
.seek(0);
1558 if(sys
.version
[0]=="2"):
1559 pycurlhead
= retrieved_headers
.read();
1560 if(sys
.version
[0]>="3"):
1561 pycurlhead
= retrieved_headers
.read().decode('UTF-8');
1562 pyhttpverinfo
= re
.findall(r
'^HTTP/([0-9.]+) (\d+) ([A-Za-z\s]+)$', pycurlhead
.splitlines()[0])[0];
1563 pycurlheadersout
= make_http_headers_from_pycurl_to_dict(pycurlhead
);
1564 retrieved_body
.seek(0);
1565 except socket
.timeout
:
1566 log
.info("Error With URL "+httpurl
);
1568 except socket
.gaierror
:
1569 log
.info("Error With URL "+httpurl
);
1572 log
.info("Error With URL "+httpurl
);
1574 httpcodeout
= geturls_text
.getinfo(geturls_text
.HTTP_CODE
);
1575 httpcodereason
= http_status_to_reason(geturls_text
.getinfo(geturls_text
.HTTP_CODE
));
1576 httpversionout
= "1.1";
1577 httpmethodout
= httpmethod
;
1578 httpurlout
= geturls_text
.getinfo(geturls_text
.EFFECTIVE_URL
);
1579 httpheaderout
= pycurlheadersout
;
1580 httpheadersentout
= httpheaders
;
1581 elif(httplibuse
=="ftp"):
1582 geturls_text
= download_file_from_ftp_file(httpurl
);
1583 if(not geturls_text
):
1585 geturls_text
.seek(0, 2);
1586 downloadsize
= geturls_text
.tell();
1587 geturls_text
.seek(0, 0);
1588 elif(httplibuse
=="sftp"):
1589 geturls_text
= download_file_from_sftp_file(httpurl
);
1590 if(not geturls_text
):
1592 geturls_text
.seek(0, 2);
1593 downloadsize
= geturls_text
.tell();
1594 geturls_text
.seek(0, 0);
1595 if(downloadsize
is not None):
1596 downloadsize
= int(downloadsize
);
1597 if downloadsize
is None: downloadsize
= 0;
1600 elif(httplibuse
=="pysftp"):
1601 geturls_text
= download_file_from_pysftp_file(httpurl
);
1602 if(not geturls_text
):
1604 geturls_text
.seek(0, 2);
1605 downloadsize
= geturls_text
.tell();
1606 geturls_text
.seek(0, 0);
1609 if(isinstance(httpheaderout
, list) and httplibuse
!="pycurl"):
1610 httpheaderout
= dict(make_http_headers_from_list_to_dict(httpheaderout
));
1611 if(isinstance(httpheaderout
, list) and httplibuse
=="pycurl"):
1612 httpheaderout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheaderout
)));
1613 if(sys
.version
[0]=="2"):
1615 prehttpheaderout
= httpheaderout
;
1616 httpheaderkeys
= httpheaderout
.keys();
1617 imax
= len(httpheaderkeys
);
1621 httpheaderout
.update({httpheaderkeys
[ic
]: prehttpheaderout
[httpheaderkeys
[ic
]]});
1623 except AttributeError:
1625 httpheaderout
= fix_header_names(httpheaderout
);
1626 if(isinstance(httpheadersentout
, list) and httplibuse
!="pycurl"):
1627 httpheadersentout
= dict(make_http_headers_from_list_to_dict(httpheadersentout
));
1628 if(isinstance(httpheadersentout
, list) and httplibuse
=="pycurl"):
1629 httpheadersentout
= dict(make_http_headers_from_pycurl_to_dict("\r\n".join(httpheadersentout
)));
1630 httpheadersentout
= fix_header_names(httpheadersentout
);
1631 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="request3" or httplibuse
=="httplib" or httplibuse
=="httplib2" or httplibuse
=="urllib3" or httplibuse
=="requests" or httplibuse
=="mechanize" or httplibuse
=="httpx" or httplibuse
=="httpx2" or httplibuse
=="httpcore" or httplibuse
=="httpcore2"):
1632 downloadsize
= httpheaderout
.get('Content-Length');
1633 if(downloadsize
is not None):
1634 downloadsize
= int(downloadsize
);
1635 if downloadsize
is None: downloadsize
= 0;
1638 log
.info("Downloading URL "+httpurl
);
1639 if(httplibuse
=="urllib" or httplibuse
=="request" or httplibuse
=="request3" or httplibuse
=="httplib" or httplibuse
=="httplib2" or httplibuse
=="urllib3" or httplibuse
=="mechanize" or httplibuse
=="httpx" or httplibuse
=="httpx2" or httplibuse
=="httpcore" or httplibuse
=="httpcore2" or httplibuse
=="ftp" or httplibuse
=="sftp" or httplibuse
=="pysftp"):
1640 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1641 tmpfilename
= f
.name
;
1643 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
1644 except AttributeError:
1646 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1651 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1653 databytes
= geturls_text
.read(buffersize
);
1654 if not databytes
: break;
1655 datasize
= len(databytes
);
1656 fulldatasize
= datasize
+ fulldatasize
;
1659 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1660 downloaddiff
= fulldatasize
- prevdownsize
;
1661 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1662 prevdownsize
= fulldatasize
;
1665 elif(httplibuse
=="requests"):
1666 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1667 tmpfilename
= f
.name
;
1669 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
1670 except AttributeError:
1672 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1677 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1679 databytes
= geturls_text
.raw
.read(buffersize
);
1680 if not databytes
: break;
1681 datasize
= len(databytes
);
1682 fulldatasize
= datasize
+ fulldatasize
;
1685 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1686 downloaddiff
= fulldatasize
- prevdownsize
;
1687 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1688 prevdownsize
= fulldatasize
;
1691 elif(httplibuse
=="pycurl"):
1692 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1693 tmpfilename
= f
.name
;
1695 os
.utime(tmpfilename
, (time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(httpheaderout
.get('Last-Modified')).timetuple())));
1696 except AttributeError:
1698 os
.utime(tmpfilename
, (time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(httpheaderout
.get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1703 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': httpheaderout
, 'Version': httpversionout
, 'Method': httpmethodout
, 'HeadersSent': httpheadersentout
, 'URL': httpurlout
, 'Code': httpcodeout
, 'Reason': httpcodereason
};
1705 databytes
= retrieved_body
.read(buffersize
);
1706 if not databytes
: break;
1707 datasize
= len(databytes
);
1708 fulldatasize
= datasize
+ fulldatasize
;
1711 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1712 downloaddiff
= fulldatasize
- prevdownsize
;
1713 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1714 prevdownsize
= fulldatasize
;
1719 geturls_text
.close();
1720 exec_time_end
= time
.time();
1721 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1722 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1725 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1726 global geturls_download_sleep
, haverequests
, havemechanize
, havepycurl
, havehttplib2
, haveurllib3
, havehttpx
, havehttpcore
, haveparamiko
, havepysftp
;
1728 sleep
= geturls_download_sleep
;
1729 if(httplibuse
=="urllib1" or httplibuse
=="urllib2" or httplibuse
=="request"):
1730 httplibuse
= "urllib";
1731 if(httplibuse
=="httplib1"):
1732 httplibuse
= "httplib";
1733 if(not haverequests
and httplibuse
=="requests"):
1734 httplibuse
= "urllib";
1735 if(not havehttpx
and httplibuse
=="httpx"):
1736 httplibuse
= "urllib";
1737 if(not havehttpx
and httplibuse
=="httpx2"):
1738 httplibuse
= "urllib";
1739 if(not havehttpcore
and httplibuse
=="httpcore"):
1740 httplibuse
= "urllib";
1741 if(not havehttpcore
and httplibuse
=="httpcore2"):
1742 httplibuse
= "urllib";
1743 if(not havemechanize
and httplibuse
=="mechanize"):
1744 httplibuse
= "urllib";
1745 if(not havepycurl
and httplibuse
=="pycurl"):
1746 httplibuse
= "urllib";
1747 if(not havehttplib2
and httplibuse
=="httplib2"):
1748 httplibuse
= "httplib";
1749 if(not haveparamiko
and httplibuse
=="sftp"):
1751 if(not havepysftp
and httplibuse
=="pysftp"):
1753 if(not outfile
=="-"):
1754 outpath
= outpath
.rstrip(os
.path
.sep
);
1755 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1756 if(not os
.path
.exists(outpath
)):
1757 os
.makedirs(outpath
);
1758 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1760 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1762 pretmpfilename
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, ranges
, buffersize
[0], sleep
, timeout
);
1763 if(not pretmpfilename
):
1765 tmpfilename
= pretmpfilename
['Filename'];
1766 downloadsize
= os
.path
.getsize(tmpfilename
);
1768 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1769 exec_time_start
= time
.time();
1770 shutil
.move(tmpfilename
, filepath
);
1772 os
.utime(filepath
, (time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple()), time
.mktime(email
.utils
.parsedate_to_datetime(pretmpfilename
.get('Headers').get('Last-Modified')).timetuple())));
1773 except AttributeError:
1775 os
.utime(filepath
, (time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple()), time
.mktime(datetime
.datetime
.strptime(pretmpfilename
.get('Headers').get('Last-Modified'), "%a, %d %b %Y %H:%M:%S %Z").timetuple())));
1780 exec_time_end
= time
.time();
1781 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1782 if(os
.path
.exists(tmpfilename
)):
1783 os
.remove(tmpfilename
);
1784 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1786 pretmpfilename
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, httplibuse
, ranges
, buffersize
[0], sleep
, timeout
);
1787 tmpfilename
= pretmpfilename
['Filename'];
1788 downloadsize
= os
.path
.getsize(tmpfilename
);
1791 exec_time_start
= time
.time();
1792 with
open(tmpfilename
, 'rb') as ft
:
1795 databytes
= ft
.read(buffersize
[1]);
1796 if not databytes
: break;
1797 datasize
= len(databytes
);
1798 fulldatasize
= datasize
+ fulldatasize
;
1801 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1802 downloaddiff
= fulldatasize
- prevdownsize
;
1803 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1804 prevdownsize
= fulldatasize
;
1807 fdata
= f
.getvalue();
1810 os
.remove(tmpfilename
);
1811 exec_time_end
= time
.time();
1812 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1813 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'Version': pretmpfilename
['Version'], 'Method': pretmpfilename
['Method'], 'Method': httpmethod
, 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code'], 'Reason': pretmpfilename
['Reason']};
1816 def download_from_url_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1, timeout
=10):
1817 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib", sleep
, timeout
);
1820 def download_from_url_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1, timeout
=10):
1821 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib", sleep
, timeout
);
1824 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1, timeout
=10):
1825 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request3", sleep
, timeout
);
1828 def download_from_url_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1, timeout
=10):
1829 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib", sleep
, timeout
);
1832 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1, timeout
=10):
1833 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib2", sleep
, timeout
);
1836 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1, timeout
=10):
1837 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib3", sleep
, timeout
);
1840 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1, timeout
=10):
1841 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "requests", sleep
, timeout
);
1844 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1, timeout
=10):
1845 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx", sleep
, timeout
);
1848 def download_from_url_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1, timeout
=10):
1849 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx2", sleep
, timeout
);
1852 def download_from_url_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1, timeout
=10):
1853 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore", sleep
, timeout
);
1856 def download_from_url_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1, timeout
=10):
1857 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore2", sleep
, timeout
);
1860 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1, timeout
=10):
1861 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "mechanize", sleep
, timeout
);
1864 def download_from_url_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1, timeout
=10):
1865 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "pycurl", sleep
, timeout
);
1868 def download_from_url_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1, timeout
=10):
1869 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "ftp", sleep
, timeout
);
1872 def download_from_url_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1, timeout
=10):
1873 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "sftp", sleep
, timeout
);
1876 def download_from_url_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1, timeout
=10):
1877 returnval
= download_from_url(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "pysftp", sleep
, timeout
);
1880 def download_from_url_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1881 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib", ranges
, buffersize
, sleep
, timeout
);
1884 def download_from_url_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1885 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib", ranges
, buffersize
, sleep
, timeout
);
1888 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1889 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request3", ranges
, buffersize
, sleep
, timeout
);
1892 def download_from_url_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1893 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib", ranges
, buffersize
, sleep
, timeout
);
1896 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1897 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib2", ranges
, buffersize
, sleep
, timeout
);
1900 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1901 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib3", ranges
, buffersize
, sleep
, timeout
);
1904 def download_from_url_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1905 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "requests", ranges
, buffersize
, sleep
, timeout
);
1908 def download_from_url_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1909 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx", ranges
, buffersize
, sleep
, timeout
);
1912 def download_from_url_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1913 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx2", ranges
, buffersize
, sleep
, timeout
);
1916 def download_from_url_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1917 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore", ranges
, buffersize
, sleep
, timeout
);
1920 def download_from_url_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1921 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore2", ranges
, buffersize
, sleep
, timeout
);
1924 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1925 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "mechanize", ranges
, buffersize
, sleep
, timeout
);
1928 def download_from_url_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1929 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "pycurl", ranges
, buffersize
, sleep
, timeout
);
1932 def download_from_url_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1933 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "ftp", ranges
, buffersize
, sleep
, timeout
);
1936 def download_from_url_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1937 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "sftp", ranges
, buffersize
, sleep
, timeout
);
1940 def download_from_url_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, ranges
=[None, None], buffersize
=524288, sleep
=-1, timeout
=10):
1941 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "pysftp", ranges
, buffersize
, sleep
, timeout
);
1944 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1945 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib", outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
1948 def download_from_url_to_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1949 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "request", outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
1952 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1953 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib", outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
1956 def download_from_url_to_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1957 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib", outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
1960 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1961 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httplib2", outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
1964 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1965 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "urllib3", outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
1968 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1969 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "requests", outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
1972 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1973 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx", outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
1976 def download_from_url_to_file_with_httpx2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1977 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpx2", outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
1980 def download_from_url_to_file_with_httpcore(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1981 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore", outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
1984 def download_from_url_to_file_with_httpcore2(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1985 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "httpcore2", outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
1988 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1989 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "mechanize", outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
1992 def download_from_url_to_file_with_pycurl(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1993 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "pycurl", outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
1996 def download_from_url_to_file_with_ftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
1997 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "ftp", outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
2000 def download_from_url_to_file_with_sftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2001 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "sftp", outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
2004 def download_from_url_to_file_with_pysftp(httpurl
, httpheaders
=geturls_headers
, httpuseragent
=None, httpreferer
=None, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), ranges
=[None, None], buffersize
=[524288, 524288], sleep
=-1, timeout
=10):
2005 returnval
= download_from_url_file(httpurl
, httpheaders
, httpuseragent
, httpreferer
, httpcookie
, httpmethod
, postdata
, "pysftp", outfile
, outpath
, ranges
, buffersize
, sleep
, timeout
);
2008 def download_file_from_ftp_file(url
):
2009 urlparts
= urlparse
.urlparse(url
);
2010 file_name
= os
.path
.basename(urlparts
.path
);
2011 file_dir
= os
.path
.dirname(urlparts
.path
);
2012 if(urlparts
.username
is not None):
2013 ftp_username
= urlparts
.username
;
2015 ftp_username
= "anonymous";
2016 if(urlparts
.password
is not None):
2017 ftp_password
= urlparts
.password
;
2018 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2019 ftp_password
= "anonymous";
2022 if(urlparts
.scheme
=="ftp"):
2024 elif(urlparts
.scheme
=="ftps"):
2028 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
2030 ftp_port
= urlparts
.port
;
2031 if(urlparts
.port
is None):
2034 ftp
.connect(urlparts
.hostname
, ftp_port
);
2035 except socket
.gaierror
:
2036 log
.info("Error With URL "+httpurl
);
2038 except socket
.timeout
:
2039 log
.info("Error With URL "+httpurl
);
2041 ftp
.login(urlparts
.username
, urlparts
.password
);
2042 if(urlparts
.scheme
=="ftps"):
2044 ftpfile
= BytesIO();
2045 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
2046 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
2051 def download_file_from_ftp_string(url
):
2052 ftpfile
= download_file_from_ftp_file(url
);
2053 return ftpfile
.read();
2055 def upload_file_to_ftp_file(ftpfile
, url
):
2056 urlparts
= urlparse
.urlparse(url
);
2057 file_name
= os
.path
.basename(urlparts
.path
);
2058 file_dir
= os
.path
.dirname(urlparts
.path
);
2059 if(urlparts
.username
is not None):
2060 ftp_username
= urlparts
.username
;
2062 ftp_username
= "anonymous";
2063 if(urlparts
.password
is not None):
2064 ftp_password
= urlparts
.password
;
2065 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2066 ftp_password
= "anonymous";
2069 if(urlparts
.scheme
=="ftp"):
2071 elif(urlparts
.scheme
=="ftps"):
2075 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
2077 ftp_port
= urlparts
.port
;
2078 if(urlparts
.port
is None):
2081 ftp
.connect(urlparts
.hostname
, ftp_port
);
2082 except socket
.gaierror
:
2083 log
.info("Error With URL "+httpurl
);
2085 except socket
.timeout
:
2086 log
.info("Error With URL "+httpurl
);
2088 ftp
.login(urlparts
.username
, urlparts
.password
);
2089 if(urlparts
.scheme
=="ftps"):
2091 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
2096 def upload_file_to_ftp_string(ftpstring
, url
):
2097 ftpfileo
= BytesIO(ftpstring
);
2098 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
2103 def download_file_from_sftp_file(url
):
2104 urlparts
= urlparse
.urlparse(url
);
2105 file_name
= os
.path
.basename(urlparts
.path
);
2106 file_dir
= os
.path
.dirname(urlparts
.path
);
2107 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
2109 sftp_port
= urlparts
.port
;
2110 if(urlparts
.port
is None):
2113 sftp_port
= urlparts
.port
;
2114 if(urlparts
.username
is not None):
2115 sftp_username
= urlparts
.username
;
2117 sftp_username
= "anonymous";
2118 if(urlparts
.password
is not None):
2119 sftp_password
= urlparts
.password
;
2120 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2121 sftp_password
= "anonymous";
2124 if(urlparts
.scheme
!="sftp"):
2126 ssh
= paramiko
.SSHClient();
2127 ssh
.load_system_host_keys();
2128 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
2130 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
2131 except paramiko
.ssh_exception
.SSHException
:
2133 except socket
.gaierror
:
2134 log
.info("Error With URL "+httpurl
);
2136 except socket
.timeout
:
2137 log
.info("Error With URL "+httpurl
);
2139 sftp
= ssh
.open_sftp();
2140 sftpfile
= BytesIO();
2141 sftp
.getfo(urlparts
.path
, sftpfile
);
2144 sftpfile
.seek(0, 0);
2147 def download_file_from_sftp_file(url
):
2151 def download_file_from_sftp_string(url
):
2152 sftpfile
= download_file_from_sftp_file(url
);
2153 return sftpfile
.read();
2155 def download_file_from_ftp_string(url
):
2159 def upload_file_to_sftp_file(sftpfile
, url
):
2160 urlparts
= urlparse
.urlparse(url
);
2161 file_name
= os
.path
.basename(urlparts
.path
);
2162 file_dir
= os
.path
.dirname(urlparts
.path
);
2163 sftp_port
= urlparts
.port
;
2164 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
2166 if(urlparts
.port
is None):
2169 sftp_port
= urlparts
.port
;
2170 if(urlparts
.username
is not None):
2171 sftp_username
= urlparts
.username
;
2173 sftp_username
= "anonymous";
2174 if(urlparts
.password
is not None):
2175 sftp_password
= urlparts
.password
;
2176 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2177 sftp_password
= "anonymous";
2180 if(urlparts
.scheme
!="sftp"):
2182 ssh
= paramiko
.SSHClient();
2183 ssh
.load_system_host_keys();
2184 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
2186 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
2187 except paramiko
.ssh_exception
.SSHException
:
2189 except socket
.gaierror
:
2190 log
.info("Error With URL "+httpurl
);
2192 except socket
.timeout
:
2193 log
.info("Error With URL "+httpurl
);
2195 sftp
= ssh
.open_sftp();
2196 sftp
.putfo(sftpfile
, urlparts
.path
);
2199 sftpfile
.seek(0, 0);
2202 def upload_file_to_sftp_file(sftpfile
, url
):
2206 def upload_file_to_sftp_string(sftpstring
, url
):
2207 sftpfileo
= BytesIO(sftpstring
);
2208 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
2212 def upload_file_to_sftp_string(url
):
2217 def download_file_from_pysftp_file(url
):
2218 urlparts
= urlparse
.urlparse(url
);
2219 file_name
= os
.path
.basename(urlparts
.path
);
2220 file_dir
= os
.path
.dirname(urlparts
.path
);
2221 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
2223 sftp_port
= urlparts
.port
;
2224 if(urlparts
.port
is None):
2227 sftp_port
= urlparts
.port
;
2228 if(urlparts
.username
is not None):
2229 sftp_username
= urlparts
.username
;
2231 sftp_username
= "anonymous";
2232 if(urlparts
.password
is not None):
2233 sftp_password
= urlparts
.password
;
2234 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2235 sftp_password
= "anonymous";
2238 if(urlparts
.scheme
!="sftp"):
2241 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
2242 except paramiko
.ssh_exception
.SSHException
:
2244 except socket
.gaierror
:
2245 log
.info("Error With URL "+httpurl
);
2247 except socket
.timeout
:
2248 log
.info("Error With URL "+httpurl
);
2250 sftp
= ssh
.open_sftp();
2251 sftpfile
= BytesIO();
2252 sftp
.getfo(urlparts
.path
, sftpfile
);
2255 sftpfile
.seek(0, 0);
2258 def download_file_from_pysftp_file(url
):
2262 def download_file_from_pysftp_string(url
):
2263 sftpfile
= download_file_from_pysftp_file(url
);
2264 return sftpfile
.read();
2266 def download_file_from_ftp_string(url
):
2270 def upload_file_to_pysftp_file(sftpfile
, url
):
2271 urlparts
= urlparse
.urlparse(url
);
2272 file_name
= os
.path
.basename(urlparts
.path
);
2273 file_dir
= os
.path
.dirname(urlparts
.path
);
2274 sftp_port
= urlparts
.port
;
2275 if(urlparts
.scheme
=="http" or urlparts
.scheme
=="https"):
2277 if(urlparts
.port
is None):
2280 sftp_port
= urlparts
.port
;
2281 if(urlparts
.username
is not None):
2282 sftp_username
= urlparts
.username
;
2284 sftp_username
= "anonymous";
2285 if(urlparts
.password
is not None):
2286 sftp_password
= urlparts
.password
;
2287 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2288 sftp_password
= "anonymous";
2291 if(urlparts
.scheme
!="sftp"):
2294 pysftp
.Connection(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
2295 except paramiko
.ssh_exception
.SSHException
:
2297 except socket
.gaierror
:
2298 log
.info("Error With URL "+httpurl
);
2300 except socket
.timeout
:
2301 log
.info("Error With URL "+httpurl
);
2303 sftp
= ssh
.open_sftp();
2304 sftp
.putfo(sftpfile
, urlparts
.path
);
2307 sftpfile
.seek(0, 0);
2310 def upload_file_to_pysftp_file(sftpfile
, url
):
2314 def upload_file_to_pysftp_string(sftpstring
, url
):
2315 sftpfileo
= BytesIO(sftpstring
);
2316 sftpfile
= upload_file_to_pysftp_files(ftpfileo
, url
);
2320 def upload_file_to_pysftp_string(url
):