4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwget.py - Last Update: 9/13/2023 Ver. 0.8.4 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, gzip
, time
, argparse
, cgi
, subprocess
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
29 havemechanize
= False;
34 havemechanize
= False;
49 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
53 if(sys
.version
[0]=="2"):
55 from cStringIO
import StringIO
;
57 from StringIO
import StringIO
;
58 # From http://python-future.org/compatible_idioms.html
59 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
60 from urllib
import urlencode
;
61 from urllib
import urlopen
as urlopenalt
;
62 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
;
63 import urllib2
, urlparse
, cookielib
;
64 from httplib
import HTTPConnection
, HTTPSConnection
;
65 if(sys
.version
[0]>="3"):
66 from io
import StringIO
, BytesIO
;
67 # From http://python-future.org/compatible_idioms.html
68 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
69 from urllib
.request
import urlopen
, Request
, install_opener
;
70 from urllib
.error
import HTTPError
;
71 import urllib
.request
as urllib2
;
72 import urllib
.parse
as urlparse
;
73 import http
.cookiejar
as cookielib
;
74 from http
.client
import HTTPConnection
, HTTPSConnection
;
76 __program_name__
= "PyWWW-Get";
77 __program_alt_name__
= "PyWWWGet";
78 __program_small_name__
= "wwwget";
79 __project__
= __program_name__
;
80 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
81 __version_info__
= (0, 8, 4, "RC 1", 1);
82 __version_date_info__
= (2023, 9, 13, "RC 1", 1);
83 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
84 __revision__
= __version_info__
[3];
85 __revision_id__
= "$Id$";
86 if(__version_info__
[4] is not None):
87 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
88 if(__version_info__
[4] is None):
89 __version_date_plusrc__
= __version_date__
;
90 if(__version_info__
[3] is not None):
91 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
92 if(__version_info__
[3] is None):
93 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
95 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
97 pytempdir
= tempfile
.gettempdir();
99 geturls_cj
= cookielib
.CookieJar();
100 geturls_ua_firefox_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0";
101 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.3";
102 geturls_ua_chrome_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36";
103 geturls_ua_chromium_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chromium/67.0.3396.99 Chrome/67.0.3396.99 Safari/537.36";
104 geturls_ua_palemoon_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.3";
105 geturls_ua_opera_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.54";
106 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.183 Safari/537.36 Vivaldi/1.96.1147.52";
107 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64; Trident/7.0; rv:11.0) like Gecko";
108 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134";
109 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
110 if(platform
.python_implementation()!=""):
111 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=platform
.python_implementation(), pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
112 if(platform
.python_implementation()==""):
113 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
="Python", pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
114 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
115 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
116 geturls_ua
= geturls_ua_firefox_windows7
;
117 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
118 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
119 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
120 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
121 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
122 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
123 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
124 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
125 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
126 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
127 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
128 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
129 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
130 geturls_headers
= geturls_headers_firefox_windows7
;
131 geturls_download_sleep
= 0;
133 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
134 if(outtype
=="print" and dbgenable
):
137 elif(outtype
=="log" and dbgenable
):
138 logging
.info(dbgtxt
);
140 elif(outtype
=="warning" and dbgenable
):
141 logging
.warning(dbgtxt
);
143 elif(outtype
=="error" and dbgenable
):
144 logging
.error(dbgtxt
);
146 elif(outtype
=="critical" and dbgenable
):
147 logging
.critical(dbgtxt
);
149 elif(outtype
=="exception" and dbgenable
):
150 logging
.exception(dbgtxt
);
152 elif(outtype
=="logalt" and dbgenable
):
153 logging
.log(dgblevel
, dbgtxt
);
155 elif(outtype
=="debug" and dbgenable
):
156 logging
.debug(dbgtxt
);
164 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
165 verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
168 def add_url_param(url
, **params
):
170 parts
= list(urlparse
.urlsplit(url
));
171 d
= dict(cgi
.parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
173 parts
[n
]=urlencode(d
);
174 return urlparse
.urlunsplit(parts
);
176 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
177 def which_exec(execfile):
178 for path
in os
.environ
["PATH"].split(":"):
179 if os
.path
.exists(path
+ "/" + execfile):
180 return path
+ "/" + execfile;
182 def listize(varlist
):
190 newlistreg
.update({ilx
: varlist
[il
]});
191 newlistrev
.update({varlist
[il
]: ilx
});
194 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
197 def twolistize(varlist
):
207 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
208 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
209 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
210 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
213 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
214 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
215 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
218 def arglistize(proexec
, *varlist
):
222 newarglist
= [proexec
];
224 if varlist
[il
][0] is not None:
225 newarglist
.append(varlist
[il
][0]);
226 if varlist
[il
][1] is not None:
227 newarglist
.append(varlist
[il
][1]);
231 # hms_string by ArcGIS Python Recipes
232 # https://arcpy.wordpress.com/2012/04/20/146/
233 def hms_string(sec_elapsed
):
234 h
= int(sec_elapsed
/ (60 * 60));
235 m
= int((sec_elapsed
% (60 * 60)) / 60);
236 s
= sec_elapsed
% 60.0;
237 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
239 # get_readable_size by Lipis
240 # http://stackoverflow.com/posts/14998888/revisions
241 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
243 if(unit
!="IEC" and unit
!="SI"):
246 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
247 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
250 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
251 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
256 if abs(bytes
) < unitsize
:
257 strformat
= "%3."+str(precision
)+"f%s";
258 pre_return_val
= (strformat
% (bytes
, unit
));
259 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
260 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
261 alt_return_val
= pre_return_val
.split();
262 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
265 strformat
= "%."+str(precision
)+"f%s";
266 pre_return_val
= (strformat
% (bytes
, "YiB"));
267 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
268 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
269 alt_return_val
= pre_return_val
.split();
270 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
273 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
275 usehashtypes
= usehashtypes
.lower();
276 getfilesize
= os
.path
.getsize(infile
);
277 return_val
= get_readable_size(getfilesize
, precision
, unit
);
279 hashtypelist
= usehashtypes
.split(",");
280 openfile
= open(infile
, "rb");
281 filecontents
= openfile
.read();
284 listnumend
= len(hashtypelist
);
285 while(listnumcount
< listnumend
):
286 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
287 hashtypelistup
= hashtypelistlow
.upper();
288 filehash
= hashlib
.new(hashtypelistup
);
289 filehash
.update(filecontents
);
290 filegethash
= filehash
.hexdigest();
291 return_val
.update({hashtypelistup
: filegethash
});
295 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
297 usehashtypes
= usehashtypes
.lower();
298 getfilesize
= len(instring
);
299 return_val
= get_readable_size(getfilesize
, precision
, unit
);
301 hashtypelist
= usehashtypes
.split(",");
303 listnumend
= len(hashtypelist
);
304 while(listnumcount
< listnumend
):
305 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
306 hashtypelistup
= hashtypelistlow
.upper();
307 filehash
= hashlib
.new(hashtypelistup
);
308 if(sys
.version
[0]=="2"):
309 filehash
.update(instring
);
310 if(sys
.version
[0]>="3"):
311 filehash
.update(instring
.encode('utf-8'));
312 filegethash
= filehash
.hexdigest();
313 return_val
.update({hashtypelistup
: filegethash
});
317 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
318 if isinstance(headers
, dict):
320 if(sys
.version
[0]=="2"):
321 for headkey
, headvalue
in headers
.iteritems():
322 returnval
.append((headkey
, headvalue
));
323 if(sys
.version
[0]>="3"):
324 for headkey
, headvalue
in headers
.items():
325 returnval
.append((headkey
, headvalue
));
326 elif isinstance(headers
, list):
332 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
333 if isinstance(headers
, dict):
335 if(sys
.version
[0]=="2"):
336 for headkey
, headvalue
in headers
.iteritems():
337 returnval
.append(headkey
+": "+headvalue
);
338 if(sys
.version
[0]>="3"):
339 for headkey
, headvalue
in headers
.items():
340 returnval
.append(headkey
+": "+headvalue
);
341 elif isinstance(headers
, list):
347 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
348 if isinstance(headers
, list):
353 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
355 elif isinstance(headers
, dict):
361 def get_httplib_support(checkvalue
=None):
362 global haverequests
, havemechanize
;
364 returnval
.append("httplib");
366 returnval
.append("httplib2");
367 returnval
.append("urllib");
369 returnval
.append("urllib3");
370 returnval
.append("request3");
371 returnval
.append("request");
373 returnval
.append("requests");
375 returnval
.append("mechanize");
376 if(not checkvalue
is None):
377 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
378 checkvalue
= "urllib";
379 if(checkvalue
=="httplib1"):
380 checkvalue
= "httplib";
381 if(checkvalue
in returnval
):
387 def check_httplib_support(checkvalue
="urllib"):
388 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
389 checkvalue
= "urllib";
390 if(checkvalue
=="httplib1"):
391 checkvalue
= "httplib";
392 returnval
= get_httplib_support(checkvalue
);
395 def get_httplib_support_list():
396 returnval
= get_httplib_support(None);
399 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, httplibuse
="urllib", sleep
=-1):
400 global geturls_download_sleep
, haverequests
, havemechanize
;
402 sleep
= geturls_download_sleep
;
403 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
404 httplibuse
= "urllib";
405 if(httplibuse
=="httplib1"):
406 httplibuse
= "httplib";
407 if(not haverequests
and httplibuse
=="requests"):
408 httplibuse
= "urllib";
409 if(not havemechanize
and httplibuse
=="mechanize"):
410 httplibuse
= "urllib";
411 if(not havehttplib2
and httplibuse
=="httplib2"):
412 httplibuse
= "httplib";
413 if(httplibuse
=="urllib"):
414 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, sleep
);
415 elif(httplibuse
=="request"):
416 returnval
= download_from_url_with_request(httpurl
, httpheaders
, httpcookie
, sleep
);
417 elif(httplibuse
=="request3"):
418 returnval
= download_from_url_with_request(httpurl
, httpheaders
, httpcookie
, sleep
);
419 elif(httplibuse
=="httplib"):
420 returnval
= download_from_url_with_httplib(httpurl
, httpheaders
, httpcookie
, sleep
);
421 elif(httplibuse
=="httplib2"):
422 returnval
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpcookie
, sleep
);
423 elif(httplibuse
=="urllib3"):
424 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpcookie
, sleep
);
425 elif(httplibuse
=="requests"):
426 returnval
= download_from_url_with_requests(httpurl
, httpheaders
, httpcookie
, sleep
);
427 elif(httplibuse
=="mechanize"):
428 returnval
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpcookie
, sleep
);
433 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1):
434 global geturls_download_sleep
, haverequests
, havemechanize
;
436 sleep
= geturls_download_sleep
;
437 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
438 httplibuse
= "urllib";
439 if(httplibuse
=="httplib1"):
440 httplibuse
= "httplib";
441 if(not haverequests
and httplibuse
=="requests"):
442 httplibuse
= "urllib";
443 if(not havemechanize
and httplibuse
=="mechanize"):
444 httplibuse
= "urllib";
445 if(not havehttplib2
and httplibuse
=="httplib2"):
446 httplibuse
= "httplib";
447 if(httplibuse
=="urllib"):
448 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
, sleep
);
449 elif(httplibuse
=="request"):
450 returnval
= download_from_url_file_with_request(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
, sleep
);
451 elif(httplibuse
=="request3"):
452 returnval
= download_from_url_file_with_request(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
, sleep
);
453 elif(httplibuse
=="httplib"):
454 returnval
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
, sleep
);
455 elif(httplibuse
=="httplib2"):
456 returnval
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
, sleep
);
457 elif(httplibuse
=="urllib3"):
458 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
, sleep
);
459 elif(httplibuse
=="requests"):
460 returnval
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
, sleep
);
461 elif(httplibuse
=="mechanize"):
462 returnval
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
, sleep
);
467 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
468 global geturls_download_sleep
, haverequests
, havemechanize
;
470 sleep
= geturls_download_sleep
;
471 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
472 httplibuse
= "urllib";
473 if(httplibuse
=="httplib1"):
474 httplibuse
= "httplib";
475 if(not haverequests
and httplibuse
=="requests"):
476 httplibuse
= "urllib";
477 if(not havemechanize
and httplibuse
=="mechanize"):
478 httplibuse
= "urllib";
479 if(not havehttplib2
and httplibuse
=="httplib2"):
480 httplibuse
= "httplib";
481 if(httplibuse
=="urllib"):
482 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, postdata
, outfile
, outpath
, buffersize
, sleep
);
483 elif(httplibuse
=="request"):
484 returnval
= download_from_url_to_file_with_request(httpurl
, httpheaders
, httpcookie
, postdata
, outfile
, outpath
, buffersize
, sleep
);
485 elif(httplibuse
=="request3"):
486 returnval
= download_from_url_to_file_with_request(httpurl
, httpheaders
, httpcookie
, postdata
, outfile
, outpath
, buffersize
, sleep
);
487 elif(httplibuse
=="httplib"):
488 returnval
= download_from_url_to_file_with_httplib(httpurl
, httpheaders
, httpcookie
, postdata
, outfile
, outpath
, buffersize
, sleep
);
489 elif(httplibuse
=="httplib2"):
490 returnval
= download_from_url_to_file_with_httplib2(httpurl
, httpheaders
, httpcookie
, postdata
, outfile
, outpath
, buffersize
, sleep
);
491 elif(httplibuse
=="urllib3"):
492 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, postdata
, outfile
, outpath
, buffersize
, sleep
);
493 elif(httplibuse
=="requests"):
494 returnval
= download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpcookie
, postdata
, outfile
, outpath
, buffersize
, sleep
);
495 elif(httplibuse
=="mechanize"):
496 returnval
= download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, postdata
, outfile
, outpath
, buffersize
, sleep
);
501 def download_from_url_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, sleep
=-1):
502 global geturls_download_sleep
;
504 sleep
= geturls_download_sleep
;
505 urlparts
= urlparse
.urlparse(httpurl
);
506 if(urlparts
.username
is not None or urlparts
.password
is not None):
507 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
508 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
509 geturls_opener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(httpcookie
));
510 if(isinstance(httpheaders
, dict)):
511 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
512 geturls_opener
.addheaders
= httpheaders
;
514 geturls_text
= geturls_opener
.open(httpurl
);
515 log
.info("Downloading URL "+httpurl
);
516 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
517 if(sys
.version
[0]=="2"):
518 strbuf
= StringIO(geturls_text
.read());
519 if(sys
.version
[0]>="3"):
520 strbuf
= BytesIO(geturls_text
.read());
521 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
522 returnval_content
= gzstrbuf
.read()[:];
523 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
524 returnval_content
= geturls_text
.read()[:];
525 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.getcode()};
526 geturls_text
.close();
529 def download_from_url_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, buffersize
=524288, sleep
=-1):
530 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
531 exec_time_start
= time
.time();
532 myhash
= hashlib
.new("sha1");
533 if(sys
.version
[0]=="2"):
534 myhash
.update(httpurl
);
535 myhash
.update(str(buffersize
));
536 myhash
.update(str(exec_time_start
));
537 if(sys
.version
[0]>="3"):
538 myhash
.update(httpurl
.encode('utf-8'));
539 myhash
.update(str(buffersize
).encode('utf-8'));
540 myhash
.update(str(exec_time_start
).encode('utf-8'));
541 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
543 sleep
= geturls_download_sleep
;
544 urlparts
= urlparse
.urlparse(httpurl
);
545 if(urlparts
.username
is not None or urlparts
.password
is not None):
546 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
547 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
548 geturls_opener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(httpcookie
));
549 if(isinstance(httpheaders
, dict)):
550 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
551 geturls_opener
.addheaders
= httpheaders
;
553 geturls_text
= geturls_opener
.open(httpurl
);
554 downloadsize
= geturls_text
.info().get('Content-Length');
555 if(downloadsize
is not None):
556 downloadsize
= int(downloadsize
);
557 if downloadsize
is None: downloadsize
= 0;
560 log
.info("Downloading URL "+httpurl
);
561 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
562 tmpfilename
= f
.name
;
563 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.getcode()};
565 databytes
= geturls_text
.read(buffersize
);
566 if not databytes
: break;
567 datasize
= len(databytes
);
568 fulldatasize
= datasize
+ fulldatasize
;
571 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
572 downloaddiff
= fulldatasize
- prevdownsize
;
573 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
574 prevdownsize
= fulldatasize
;
577 geturls_text
.close();
578 exec_time_end
= time
.time();
579 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
580 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
583 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
584 global geturls_download_sleep
;
586 sleep
= geturls_download_sleep
;
587 if(not outfile
=="-"):
588 outpath
= outpath
.rstrip(os
.path
.sep
);
589 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
590 if(not os
.path
.exists(outpath
)):
591 os
.makedirs(outpath
);
592 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
594 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
596 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
[0], sleep
);
597 tmpfilename
= pretmpfilename
['Filename'];
598 downloadsize
= os
.path
.getsize(tmpfilename
);
600 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
601 exec_time_start
= time
.time();
602 shutil
.move(tmpfilename
, filepath
);
603 exec_time_end
= time
.time();
604 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
605 if(os
.path
.exists(tmpfilename
)):
606 os
.remove(tmpfilename
);
607 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
608 if(outfile
=="-" and sys
.version
[0]=="2"):
609 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
610 tmpfilename
= pretmpfilename
['Filename'];
611 downloadsize
= os
.path
.getsize(tmpfilename
);
614 exec_time_start
= time
.time();
615 with
open(tmpfilename
, 'rb') as ft
:
618 databytes
= ft
.read(buffersize
[1]);
619 if not databytes
: break;
620 datasize
= len(databytes
);
621 fulldatasize
= datasize
+ fulldatasize
;
624 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
625 downloaddiff
= fulldatasize
- prevdownsize
;
626 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
627 prevdownsize
= fulldatasize
;
630 fdata
= f
.getvalue();
633 os
.remove(tmpfilename
);
634 exec_time_end
= time
.time();
635 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
636 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
637 if(outfile
=="-" and sys
.version
[0]>="3"):
638 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
639 tmpfilename
= pretmpfilename
['Filename'];
640 downloadsize
= os
.path
.getsize(tmpfilename
);
643 exec_time_start
= time
.time();
644 with
open(tmpfilename
, 'rb') as ft
:
647 databytes
= ft
.read(buffersize
[1]);
648 if not databytes
: break;
649 datasize
= len(databytes
);
650 fulldatasize
= datasize
+ fulldatasize
;
653 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
654 downloaddiff
= fulldatasize
- prevdownsize
;
655 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
656 prevdownsize
= fulldatasize
;
659 fdata
= f
.getvalue();
662 os
.remove(tmpfilename
);
663 exec_time_end
= time
.time();
664 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
665 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
668 def download_from_url_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, sleep
=-1):
669 global geturls_download_sleep
;
671 sleep
= geturls_download_sleep
;
672 urlparts
= urlparse
.urlparse(httpurl
);
673 if(urlparts
.username
is not None or urlparts
.password
is not None):
674 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
675 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
676 geturls_opener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(httpcookie
));
677 geturls_opener
.addheaders
= httpheaders
;
679 if(urlparts
[0]=="http"):
680 httpconn
= HTTPConnection(urlparts
[1]);
681 elif(urlparts
[0]=="https"):
682 httpconn
= HTTPSConnection(urlparts
[1]);
685 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
686 geturls_text
= httpconn
.getresponse();
687 log
.info("Downloading URL "+httpurl
);
688 if(dict(geturls_text
.getheaders()).get("Content-Encoding")=="gzip" or dict(geturls_text
.getheaders()).get("Content-Encoding")=="deflate"):
689 if(sys
.version
[0]=="2"):
690 strbuf
= StringIO(geturls_text
.read());
691 if(sys
.version
[0]>="3"):
692 strbuf
= BytesIO(geturls_text
.read());
693 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
694 returnval_content
= gzstrbuf
.read()[:];
695 if(dict(geturls_text
.getheaders()).get("Content-Encoding")!="gzip" and dict(geturls_text
.getheaders()).get("Content-Encoding")!="deflate"):
696 returnval_content
= geturls_text
.read()[:];
697 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.getheaders()), 'URL': httpurl
, 'Code': geturls_text
.status
};
698 geturls_text
.close();
701 def download_from_url_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, buffersize
=524288, sleep
=-1):
702 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
703 exec_time_start
= time
.time();
704 myhash
= hashlib
.new("sha1");
705 if(sys
.version
[0]=="2"):
706 myhash
.update(httpurl
);
707 myhash
.update(str(buffersize
));
708 myhash
.update(str(exec_time_start
));
709 if(sys
.version
[0]>="3"):
710 myhash
.update(httpurl
.encode('utf-8'));
711 myhash
.update(str(buffersize
).encode('utf-8'));
712 myhash
.update(str(exec_time_start
).encode('utf-8'));
713 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
715 sleep
= geturls_download_sleep
;
716 urlparts
= urlparse
.urlparse(httpurl
);
717 if(urlparts
.username
is not None or urlparts
.password
is not None):
718 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
719 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
720 geturls_opener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(httpcookie
));
721 geturls_opener
.addheaders
= httpheaders
;
723 if(urlparts
[0]=="http"):
724 httpconn
= HTTPConnection(urlparts
[1]);
725 elif(urlparts
[0]=="https"):
726 httpconn
= HTTPSConnection(urlparts
[1]);
729 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
)
730 geturls_text
= httpconn
.getresponse();
731 downloadsize
= dict(geturls_text
.getheaders()).get('Content-Length');
732 if(downloadsize
is not None):
733 downloadsize
= int(downloadsize
);
734 if downloadsize
is None: downloadsize
= 0;
737 log
.info("Downloading URL "+httpurl
);
738 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
739 tmpfilename
= f
.name
;
740 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(dict(geturls_text
.getheaders())), 'URL': httpurl
, 'Code': geturls_text
.status
};
742 databytes
= geturls_text
.read(buffersize
);
743 if not databytes
: break;
744 datasize
= len(databytes
);
745 fulldatasize
= datasize
+ fulldatasize
;
748 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
749 downloaddiff
= fulldatasize
- prevdownsize
;
750 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
751 prevdownsize
= fulldatasize
;
754 geturls_text
.close();
755 exec_time_end
= time
.time();
756 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
757 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
760 def download_from_url_to_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
761 global geturls_download_sleep
;
763 sleep
= geturls_download_sleep
;
764 if(not outfile
=="-"):
765 outpath
= outpath
.rstrip(os
.path
.sep
);
766 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
767 if(not os
.path
.exists(outpath
)):
768 os
.makedirs(outpath
);
769 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
771 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
773 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
[0], sleep
);
774 tmpfilename
= pretmpfilename
['Filename'];
775 downloadsize
= os
.path
.getsize(tmpfilename
);
777 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
778 exec_time_start
= time
.time();
779 shutil
.move(tmpfilename
, filepath
);
780 exec_time_end
= time
.time();
781 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
782 if(os
.path
.exists(tmpfilename
)):
783 os
.remove(tmpfilename
);
784 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
785 if(outfile
=="-" and sys
.version
[0]=="2"):
786 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
787 tmpfilename
= pretmpfilename
['Filename'];
788 downloadsize
= os
.path
.getsize(tmpfilename
);
791 exec_time_start
= time
.time();
792 with
open(tmpfilename
, 'rb') as ft
:
795 databytes
= ft
.read(buffersize
[1]);
796 if not databytes
: break;
797 datasize
= len(databytes
);
798 fulldatasize
= datasize
+ fulldatasize
;
801 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
802 downloaddiff
= fulldatasize
- prevdownsize
;
803 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
804 prevdownsize
= fulldatasize
;
807 fdata
= f
.getvalue();
810 os
.remove(tmpfilename
);
811 exec_time_end
= time
.time();
812 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
813 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
814 if(outfile
=="-" and sys
.version
[0]>="3"):
815 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
816 tmpfilename
= pretmpfilename
['Filename'];
817 downloadsize
= os
.path
.getsize(tmpfilename
);
820 exec_time_start
= time
.time();
821 with
open(tmpfilename
, 'rb') as ft
:
824 databytes
= ft
.read(buffersize
[1]);
825 if not databytes
: break;
826 datasize
= len(databytes
);
827 fulldatasize
= datasize
+ fulldatasize
;
830 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
831 downloaddiff
= fulldatasize
- prevdownsize
;
832 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
833 prevdownsize
= fulldatasize
;
836 fdata
= f
.getvalue();
839 os
.remove(tmpfilename
);
840 exec_time_end
= time
.time();
841 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
842 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
846 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, sleep
=-1):
847 global geturls_download_sleep
;
849 sleep
= geturls_download_sleep
;
850 urlparts
= urlparse
.urlparse(httpurl
);
851 if(urlparts
.username
is not None or urlparts
.password
is not None):
852 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
853 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
854 geturls_opener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(httpcookie
));
855 geturls_opener
.addheaders
= httpheaders
;
857 if(urlparts
[0]=="http"):
858 httpconn
= HTTPConnectionWithTimeout(urlparts
[1]);
859 elif(urlparts
[0]=="https"):
860 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1]);
863 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
864 geturls_text
= httpconn
.getresponse();
865 log
.info("Downloading URL "+httpurl
);
866 if(dict(geturls_text
.getheaders()).get("Content-Encoding")=="gzip" or dict(geturls_text
.getheaders()).get("Content-Encoding")=="deflate"):
867 if(sys
.version
[0]=="2"):
868 strbuf
= StringIO(geturls_text
.read());
869 if(sys
.version
[0]>="3"):
870 strbuf
= BytesIO(geturls_text
.read());
871 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
872 returnval_content
= gzstrbuf
.read()[:];
873 if(dict(geturls_text
.getheaders()).get("Content-Encoding")!="gzip" and dict(geturls_text
.getheaders()).get("Content-Encoding")!="deflate"):
874 returnval_content
= geturls_text
.read()[:];
875 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.getheaders()), 'URL': httpurl
, 'Code': geturls_text
.status
};
876 geturls_text
.close();
879 if(not havehttplib2
):
880 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, sleep
=-1):
881 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, postdata
, sleep
)
885 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=none
, buffersize
=524288, sleep
=-1):
886 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
887 exec_time_start
= time
.time();
888 myhash
= hashlib
.new("sha1");
889 if(sys
.version
[0]=="2"):
890 myhash
.update(httpurl
);
891 myhash
.update(str(buffersize
));
892 myhash
.update(str(exec_time_start
));
893 if(sys
.version
[0]>="3"):
894 myhash
.update(httpurl
.encode('utf-8'));
895 myhash
.update(str(buffersize
).encode('utf-8'));
896 myhash
.update(str(exec_time_start
).encode('utf-8'));
897 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
899 sleep
= geturls_download_sleep
;
900 urlparts
= urlparse
.urlparse(httpurl
);
901 if(urlparts
.username
is not None or urlparts
.password
is not None):
902 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
903 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
904 geturls_opener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(httpcookie
));
905 geturls_opener
.addheaders
= httpheaders
;
907 if(urlparts
[0]=="http"):
908 httpconn
= HTTPConnectionWithTimeout(urlparts
[1]);
909 elif(urlparts
[0]=="https"):
910 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1]);
913 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
)
914 geturls_text
= httpconn
.getresponse();
915 downloadsize
= dict(geturls_text
.getheaders()).get('Content-Length');
916 if(downloadsize
is not None):
917 downloadsize
= int(downloadsize
);
918 if downloadsize
is None: downloadsize
= 0;
921 log
.info("Downloading URL "+httpurl
);
922 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
923 tmpfilename
= f
.name
;
924 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(dict(geturls_text
.getheaders())), 'URL': httpurl
, 'Code': geturls_text
.status
};
926 databytes
= geturls_text
.read(buffersize
);
927 if not databytes
: break;
928 datasize
= len(databytes
);
929 fulldatasize
= datasize
+ fulldatasize
;
932 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
933 downloaddiff
= fulldatasize
- prevdownsize
;
934 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
935 prevdownsize
= fulldatasize
;
938 geturls_text
.close();
939 exec_time_end
= time
.time();
940 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
941 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
944 if(not havehttplib2
):
945 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, buffersize
=524288, sleep
=-1):
946 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
, sleep
)
950 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
951 global geturls_download_sleep
;
953 sleep
= geturls_download_sleep
;
954 if(not outfile
=="-"):
955 outpath
= outpath
.rstrip(os
.path
.sep
);
956 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
957 if(not os
.path
.exists(outpath
)):
958 os
.makedirs(outpath
);
959 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
961 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
963 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
[0], sleep
);
964 tmpfilename
= pretmpfilename
['Filename'];
965 downloadsize
= os
.path
.getsize(tmpfilename
);
967 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
968 exec_time_start
= time
.time();
969 shutil
.move(tmpfilename
, filepath
);
970 exec_time_end
= time
.time();
971 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
972 if(os
.path
.exists(tmpfilename
)):
973 os
.remove(tmpfilename
);
974 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
975 if(outfile
=="-" and sys
.version
[0]=="2"):
976 pretmpfilename
= download_from_url_file_with_urllib2(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
977 tmpfilename
= pretmpfilename
['Filename'];
978 downloadsize
= os
.path
.getsize(tmpfilename
);
981 exec_time_start
= time
.time();
982 with
open(tmpfilename
, 'rb') as ft
:
985 databytes
= ft
.read(buffersize
[1]);
986 if not databytes
: break;
987 datasize
= len(databytes
);
988 fulldatasize
= datasize
+ fulldatasize
;
991 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
992 downloaddiff
= fulldatasize
- prevdownsize
;
993 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
994 prevdownsize
= fulldatasize
;
997 fdata
= f
.getvalue();
1000 os
.remove(tmpfilename
);
1001 exec_time_end
= time
.time();
1002 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1003 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1004 if(outfile
=="-" and sys
.version
[0]>="3"):
1005 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
1006 tmpfilename
= pretmpfilename
['Filename'];
1007 downloadsize
= os
.path
.getsize(tmpfilename
);
1010 exec_time_start
= time
.time();
1011 with
open(tmpfilename
, 'rb') as ft
:
1014 databytes
= ft
.read(buffersize
[1]);
1015 if not databytes
: break;
1016 datasize
= len(databytes
);
1017 fulldatasize
= datasize
+ fulldatasize
;
1020 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1021 downloaddiff
= fulldatasize
- prevdownsize
;
1022 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1023 prevdownsize
= fulldatasize
;
1026 fdata
= f
.getvalue();
1029 os
.remove(tmpfilename
);
1030 exec_time_end
= time
.time();
1031 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1032 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1035 if(not havehttplib2
):
1036 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1037 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
, outfile
, outpath
, sleep
)
1040 def download_from_url_with_request(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, sleep
=-1):
1041 global geturls_download_sleep
;
1043 sleep
= geturls_download_sleep
;
1044 urlparts
= urlparse
.urlparse(httpurl
);
1045 if(urlparts
.username
is not None or urlparts
.password
is not None):
1046 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1047 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1048 geturls_opener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(httpcookie
));
1049 if(isinstance(httpheaders
, dict)):
1050 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1051 geturls_opener
.addheaders
= httpheaders
;
1052 urllib
.request
.install_opener(geturls_opener
);
1054 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1055 geturls_text
= urlopen(Request(httpurl
, headers
=httpheaders
));
1056 log
.info("Downloading URL "+httpurl
);
1057 if(geturls_text
.headers
.get("Content-Encoding")=="gzip" or geturls_text
.headers
.get("Content-Encoding")=="deflate"):
1058 if(sys
.version
[0]=="2"):
1059 strbuf
= StringIO(geturls_text
.read());
1060 if(sys
.version
[0]>="3"):
1061 strbuf
= BytesIO(geturls_text
.read());
1062 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1063 returnval_content
= gzstrbuf
.read()[:];
1064 if(geturls_text
.headers
.get("Content-Encoding")!="gzip" and geturls_text
.headers
.get("Content-Encoding")!="deflate"):
1065 returnval_content
= geturls_text
.read()[:];
1066 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.headers
), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.getcode()};
1067 geturls_text
.close();
1070 def download_from_url_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, buffersize
=524288, sleep
=-1):
1071 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1072 exec_time_start
= time
.time();
1073 myhash
= hashlib
.new("sha1");
1074 if(sys
.version
[0]=="2"):
1075 myhash
.update(httpurl
);
1076 myhash
.update(str(buffersize
));
1077 myhash
.update(str(exec_time_start
));
1078 if(sys
.version
[0]>="3"):
1079 myhash
.update(httpurl
.encode('utf-8'));
1080 myhash
.update(str(buffersize
).encode('utf-8'));
1081 myhash
.update(str(exec_time_start
).encode('utf-8'));
1082 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1084 sleep
= geturls_download_sleep
;
1085 urlparts
= urlparse
.urlparse(httpurl
);
1086 if(urlparts
.username
is not None or urlparts
.password
is not None):
1087 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1088 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1089 geturls_opener
= urllib2
.build_opener(urllib2
.HTTPCookieProcessor(httpcookie
));
1090 if(isinstance(httpheaders
, dict)):
1091 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1092 geturls_opener
.addheaders
= httpheaders
;
1093 urllib
.request
.install_opener(geturls_opener
);
1095 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1096 geturls_text
= urlopen(Request(httpurl
, headers
=httpheaders
));
1097 downloadsize
= geturls_text
.headers
.get('Content-Length');
1098 if(downloadsize
is not None):
1099 downloadsize
= int(downloadsize
);
1100 if downloadsize
is None: downloadsize
= 0;
1103 log
.info("Downloading URL "+httpurl
);
1104 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1105 tmpfilename
= f
.name
;
1106 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.headers
), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.getcode()};
1108 databytes
= geturls_text
.read(buffersize
);
1109 if not databytes
: break;
1110 datasize
= len(databytes
);
1111 fulldatasize
= datasize
+ fulldatasize
;
1114 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1115 downloaddiff
= fulldatasize
- prevdownsize
;
1116 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1117 prevdownsize
= fulldatasize
;
1120 geturls_text
.close();
1121 exec_time_end
= time
.time();
1122 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1123 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1126 def download_from_url_to_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1127 global geturls_download_sleep
;
1129 sleep
= geturls_download_sleep
;
1130 if(not outfile
=="-"):
1131 outpath
= outpath
.rstrip(os
.path
.sep
);
1132 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1133 if(not os
.path
.exists(outpath
)):
1134 os
.makedirs(outpath
);
1135 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1137 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1139 pretmpfilename
= download_from_url_file_with_request(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
[0], sleep
);
1140 tmpfilename
= pretmpfilename
['Filename'];
1141 downloadsize
= os
.path
.getsize(tmpfilename
);
1143 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1144 exec_time_start
= time
.time();
1145 shutil
.move(tmpfilename
, filepath
);
1146 exec_time_end
= time
.time();
1147 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1148 if(os
.path
.exists(tmpfilename
)):
1149 os
.remove(tmpfilename
);
1150 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1151 if(outfile
=="-" and sys
.version
[0]=="2"):
1152 pretmpfilename
= download_from_url_file_with_request(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
1153 tmpfilename
= pretmpfilename
['Filename'];
1154 downloadsize
= os
.path
.getsize(tmpfilename
);
1157 exec_time_start
= time
.time();
1158 with
open(tmpfilename
, 'rb') as ft
:
1161 databytes
= ft
.read(buffersize
[1]);
1162 if not databytes
: break;
1163 datasize
= len(databytes
);
1164 fulldatasize
= datasize
+ fulldatasize
;
1167 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1168 downloaddiff
= fulldatasize
- prevdownsize
;
1169 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1170 prevdownsize
= fulldatasize
;
1173 fdata
= f
.getvalue();
1176 os
.remove(tmpfilename
);
1177 exec_time_end
= time
.time();
1178 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1179 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1180 if(outfile
=="-" and sys
.version
[0]>="3"):
1181 pretmpfilename
= download_from_url_file_with_request(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
1182 tmpfilename
= pretmpfilename
['Filename'];
1183 downloadsize
= os
.path
.getsize(tmpfilename
);
1186 exec_time_start
= time
.time();
1187 with
open(tmpfilename
, 'rb') as ft
:
1190 databytes
= ft
.read(buffersize
[1]);
1191 if not databytes
: break;
1192 datasize
= len(databytes
);
1193 fulldatasize
= datasize
+ fulldatasize
;
1196 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1197 downloaddiff
= fulldatasize
- prevdownsize
;
1198 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1199 prevdownsize
= fulldatasize
;
1202 fdata
= f
.getvalue();
1205 os
.remove(tmpfilename
);
1206 exec_time_end
= time
.time();
1207 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1208 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1212 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, sleep
=-1):
1213 global geturls_download_sleep
;
1215 sleep
= geturls_download_sleep
;
1216 urlparts
= urlparse
.urlparse(httpurl
);
1217 if(urlparts
.username
is not None or urlparts
.password
is not None):
1218 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1219 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1220 if(isinstance(httpheaders
, list)):
1221 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1223 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1224 log
.info("Downloading URL "+httpurl
);
1225 if(geturls_text
.headers
.get('Content-Type')=="gzip" or geturls_text
.headers
.get('Content-Type')=="deflate"):
1226 if(sys
.version
[0]=="2"):
1227 strbuf
= StringIO(geturls_text
.content
);
1228 if(sys
.version
[0]>="3"):
1229 strbuf
= BytesIO(geturls_text
.content
);
1230 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1231 returnval_content
= gzstrbuf
.content
[:];
1232 if(geturls_text
.headers
.get('Content-Type')!="gzip" and geturls_text
.headers
.get('Content-Type')!="deflate"):
1233 returnval_content
= geturls_text
.content
[:];
1234 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.headers
), 'URL': geturls_text
.url
, 'Code': geturls_text
.status_code
};
1235 geturls_text
.close();
1238 if(not haverequests
):
1239 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, sleep
=-1):
1240 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, postdata
, sleep
)
1244 def download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, postdata
=None, buffersize
=524288, sleep
=-1):
1245 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1246 exec_time_start
= time
.time();
1247 myhash
= hashlib
.new("sha1");
1248 if(sys
.version
[0]=="2"):
1249 myhash
.update(httpurl
);
1250 myhash
.update(str(buffersize
));
1251 myhash
.update(str(exec_time_start
));
1252 if(sys
.version
[0]>="3"):
1253 myhash
.update(httpurl
.encode('utf-8'));
1254 myhash
.update(str(buffersize
).encode('utf-8'));
1255 myhash
.update(str(exec_time_start
).encode('utf-8'));
1256 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1258 sleep
= geturls_download_sleep
;
1259 urlparts
= urlparse
.urlparse(httpurl
);
1260 if(urlparts
.username
is not None or urlparts
.password
is not None):
1261 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1262 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1263 if(isinstance(httpheaders
, list)):
1264 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1266 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
, stream
=True);
1267 downloadsize
= int(geturls_text
.headers
.get('Content-Length'));
1268 if(downloadsize
is not None):
1269 downloadsize
= int(downloadsize
);
1270 if downloadsize
is None: downloadsize
= 0;
1273 log
.info("Downloading URL "+httpurl
);
1274 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1275 tmpfilename
= f
.name
;
1276 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.headers
), 'URL': geturls_text
.url
, 'Code': geturls_text
.status_code
};
1277 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
1278 datasize
= len(databytes
);
1279 fulldatasize
= datasize
+ fulldatasize
;
1282 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1283 downloaddiff
= fulldatasize
- prevdownsize
;
1284 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1285 prevdownsize
= fulldatasize
;
1288 geturls_text
.close();
1289 exec_time_end
= time
.time();
1290 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1291 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1294 if(not haverequests
):
1295 def download_from_url_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, buffersize
=524288, sleep
=-1):
1296 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
, sleep
)
1300 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=none
, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1301 global geturls_download_sleep
;
1303 sleep
= geturls_download_sleep
;
1304 if(not outfile
=="-"):
1305 outpath
= outpath
.rstrip(os
.path
.sep
);
1306 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1307 if(not os
.path
.exists(outpath
)):
1308 os
.makedirs(outpath
);
1309 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1311 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1313 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
[0], sleep
);
1314 tmpfilename
= pretmpfilename
['Filename'];
1315 downloadsize
= os
.path
.getsize(tmpfilename
);
1317 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1318 exec_time_start
= time
.time();
1319 shutil
.move(tmpfilename
, filepath
);
1320 exec_time_end
= time
.time();
1321 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1322 if(os
.path
.exists(tmpfilename
)):
1323 os
.remove(tmpfilename
);
1324 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1325 if(outfile
=="-" and sys
.version
[0]=="2"):
1326 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
1327 tmpfilename
= pretmpfilename
['Filename'];
1328 downloadsize
= os
.path
.getsize(tmpfilename
);
1331 exec_time_start
= time
.time();
1332 with
open(tmpfilename
, 'rb') as ft
:
1335 databytes
= ft
.read(buffersize
[1]);
1336 if not databytes
: break;
1337 datasize
= len(databytes
);
1338 fulldatasize
= datasize
+ fulldatasize
;
1341 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1342 downloaddiff
= fulldatasize
- prevdownsize
;
1343 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1344 prevdownsize
= fulldatasize
;
1347 fdata
= f
.getvalue();
1350 os
.remove(tmpfilename
);
1351 exec_time_end
= time
.time();
1352 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1353 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1354 if(outfile
=="-" and sys
.version
[0]>="3"):
1355 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
1356 tmpfilename
= pretmpfilename
['Filename'];
1357 downloadsize
= os
.path
.getsize(tmpfilename
);
1360 exec_time_start
= time
.time();
1361 with
open(tmpfilename
, 'rb') as ft
:
1364 databytes
= ft
.read(buffersize
[1]);
1365 if not databytes
: break;
1366 datasize
= len(databytes
);
1367 fulldatasize
= datasize
+ fulldatasize
;
1370 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1371 downloaddiff
= fulldatasize
- prevdownsize
;
1372 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1373 prevdownsize
= fulldatasize
;
1376 fdata
= f
.getvalue();
1379 os
.remove(tmpfilename
);
1380 exec_time_end
= time
.time();
1381 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1382 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1385 if(not haverequests
):
1386 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1387 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
, outfile
, outpath
, sleep
)
1391 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, sleep
=-1):
1392 global geturls_download_sleep
;
1394 sleep
= geturls_download_sleep
;
1395 urlparts
= urlparse
.urlparse(httpurl
);
1396 if(urlparts
.username
is not None or urlparts
.password
is not None):
1397 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1398 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1399 if(isinstance(httpheaders
, list)):
1400 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1402 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
1403 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1404 log
.info("Downloading URL "+httpurl
);
1405 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
1406 if(sys
.version
[0]=="2"):
1407 strbuf
= StringIO(geturls_text
.read());
1408 if(sys
.version
[0]>="3"):
1409 strbuf
= BytesIO(geturls_text
.read());
1410 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1411 returnval_content
= gzstrbuf
.read()[:];
1412 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
1413 returnval_content
= geturls_text
.read()[:];
1414 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.status
};
1415 geturls_text
.close();
1418 if(not haveurllib3
):
1419 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, sleep
=-1):
1420 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, sleep
)
1424 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, buffersize
=524288, sleep
=-1):
1425 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1426 exec_time_start
= time
.time();
1427 myhash
= hashlib
.new("sha1");
1428 if(sys
.version
[0]=="2"):
1429 myhash
.update(httpurl
);
1430 myhash
.update(str(buffersize
));
1431 myhash
.update(str(exec_time_start
));
1432 if(sys
.version
[0]>="3"):
1433 myhash
.update(httpurl
.encode('utf-8'));
1434 myhash
.update(str(buffersize
).encode('utf-8'));
1435 myhash
.update(str(exec_time_start
).encode('utf-8'));
1436 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1438 sleep
= geturls_download_sleep
;
1439 urlparts
= urlparse
.urlparse(httpurl
);
1440 if(urlparts
.username
is not None or urlparts
.password
is not None):
1441 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1442 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1443 if(isinstance(httpheaders
, list)):
1444 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1446 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
1447 geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1448 downloadsize
= int(geturls_text
.headers
.get('Content-Length'));
1449 if(downloadsize
is not None):
1450 downloadsize
= int(downloadsize
);
1451 if downloadsize
is None: downloadsize
= 0;
1454 log
.info("Downloading URL "+httpurl
);
1455 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1456 tmpfilename
= f
.name
;
1457 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.status
};
1459 databytes
= geturls_text
.read(buffersize
);
1460 if not databytes
: break;
1461 datasize
= len(databytes
);
1462 fulldatasize
= datasize
+ fulldatasize
;
1465 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1466 downloaddiff
= fulldatasize
- prevdownsize
;
1467 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1468 prevdownsize
= fulldatasize
;
1471 geturls_text
.close();
1472 exec_time_end
= time
.time();
1473 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1474 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1477 if(not haveurllib3
):
1478 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, buffersize
=524288, sleep
=-1):
1479 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
, sleep
)
1483 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1484 global geturls_download_sleep
;
1486 sleep
= geturls_download_sleep
;
1487 if(not outfile
=="-"):
1488 outpath
= outpath
.rstrip(os
.path
.sep
);
1489 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1490 if(not os
.path
.exists(outpath
)):
1491 os
.makedirs(outpath
);
1492 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1494 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1496 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
[0], sleep
);
1497 tmpfilename
= pretmpfilename
['Filename'];
1498 downloadsize
= os
.path
.getsize(tmpfilename
);
1500 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1501 exec_time_start
= time
.time();
1502 shutil
.move(tmpfilename
, filepath
);
1503 exec_time_end
= time
.time();
1504 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1505 if(os
.path
.exists(tmpfilename
)):
1506 os
.remove(tmpfilename
);
1507 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1508 if(outfile
=="-" and sys
.version
[0]=="2"):
1509 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
1510 tmpfilename
= pretmpfilename
['Filename'];
1511 downloadsize
= os
.path
.getsize(tmpfilename
);
1514 exec_time_start
= time
.time();
1515 with
open(tmpfilename
, 'rb') as ft
:
1518 databytes
= ft
.read(buffersize
[1]);
1519 if not databytes
: break;
1520 datasize
= len(databytes
);
1521 fulldatasize
= datasize
+ fulldatasize
;
1524 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1525 downloaddiff
= fulldatasize
- prevdownsize
;
1526 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1527 prevdownsize
= fulldatasize
;
1530 fdata
= f
.getvalue();
1533 os
.remove(tmpfilename
);
1534 exec_time_end
= time
.time();
1535 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1536 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1537 if(outfile
=="-" and sys
.version
[0]>="3"):
1538 pretmpfilename
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
1539 tmpfilename
= pretmpfilename
['Filename'];
1540 downloadsize
= os
.path
.getsize(tmpfilename
);
1543 exec_time_start
= time
.time();
1544 with
open(tmpfilename
, 'rb') as ft
:
1547 databytes
= ft
.read(buffersize
[1]);
1548 if not databytes
: break;
1549 datasize
= len(databytes
);
1550 fulldatasize
= datasize
+ fulldatasize
;
1553 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1554 downloaddiff
= fulldatasize
- prevdownsize
;
1555 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1556 prevdownsize
= fulldatasize
;
1559 fdata
= f
.getvalue();
1562 os
.remove(tmpfilename
);
1563 exec_time_end
= time
.time();
1564 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1565 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1568 if(not haveurllib3
):
1569 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1570 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
, outfile
, outpath
, sleep
)
1574 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, sleep
=-1):
1575 global geturls_download_sleep
;
1577 sleep
= geturls_download_sleep
;
1578 urlparts
= urlparse
.urlparse(httpurl
);
1579 if(urlparts
.username
is not None or urlparts
.password
is not None):
1580 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1581 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1582 if(isinstance(httpheaders
, list)):
1583 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1585 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
1586 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1587 log
.info("Downloading URL "+httpurl
);
1588 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
1589 if(sys
.version
[0]=="2"):
1590 strbuf
= StringIO(geturls_text
.read());
1591 if(sys
.version
[0]>="3"):
1592 strbuf
= BytesIO(geturls_text
.read());
1593 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1594 returnval_content
= gzstrbuf
.read()[:];
1595 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
1596 returnval_content
= geturls_text
.read()[:];
1597 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.status
};
1598 geturls_text
.close();
1601 if(not haveurllib3
):
1602 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, sleep
=-1):
1603 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, postdata
, sleep
)
1607 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, buffersize
=524288, sleep
=-1):
1608 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1609 exec_time_start
= time
.time();
1610 myhash
= hashlib
.new("sha1");
1611 if(sys
.version
[0]=="2"):
1612 myhash
.update(httpurl
);
1613 myhash
.update(str(buffersize
));
1614 myhash
.update(str(exec_time_start
));
1615 if(sys
.version
[0]>="3"):
1616 myhash
.update(httpurl
.encode('utf-8'));
1617 myhash
.update(str(buffersize
).encode('utf-8'));
1618 myhash
.update(str(exec_time_start
).encode('utf-8'));
1619 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1621 sleep
= geturls_download_sleep
;
1622 urlparts
= urlparse
.urlparse(httpurl
);
1623 if(urlparts
.username
is not None or urlparts
.password
is not None):
1624 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1625 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1626 if(isinstance(httpheaders
, list)):
1627 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1629 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
1630 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1631 downloadsize
= int(geturls_text
.headers
.get('Content-Length'));
1632 if(downloadsize
is not None):
1633 downloadsize
= int(downloadsize
);
1634 if downloadsize
is None: downloadsize
= 0;
1637 log
.info("Downloading URL "+httpurl
);
1638 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1639 tmpfilename
= f
.name
;
1640 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.status
};
1642 databytes
= geturls_text
.read(buffersize
);
1643 if not databytes
: break;
1644 datasize
= len(databytes
);
1645 fulldatasize
= datasize
+ fulldatasize
;
1648 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1649 downloaddiff
= fulldatasize
- prevdownsize
;
1650 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1651 prevdownsize
= fulldatasize
;
1654 geturls_text
.close();
1655 exec_time_end
= time
.time();
1656 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1657 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1660 if(not haveurllib3
):
1661 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, buffersize
=524288, sleep
=-1):
1662 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
, sleep
)
1666 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1667 global geturls_download_sleep
;
1669 sleep
= geturls_download_sleep
;
1670 if(not outfile
=="-"):
1671 outpath
= outpath
.rstrip(os
.path
.sep
);
1672 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1673 if(not os
.path
.exists(outpath
)):
1674 os
.makedirs(outpath
);
1675 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1677 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1679 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
[0], sleep
);
1680 tmpfilename
= pretmpfilename
['Filename'];
1681 downloadsize
= os
.path
.getsize(tmpfilename
);
1683 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1684 exec_time_start
= time
.time();
1685 shutil
.move(tmpfilename
, filepath
);
1686 exec_time_end
= time
.time();
1687 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1688 if(os
.path
.exists(tmpfilename
)):
1689 os
.remove(tmpfilename
);
1690 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1691 if(outfile
=="-" and sys
.version
[0]=="2"):
1692 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
1693 tmpfilename
= pretmpfilename
['Filename'];
1694 downloadsize
= os
.path
.getsize(tmpfilename
);
1697 exec_time_start
= time
.time();
1698 with
open(tmpfilename
, 'rb') as ft
:
1701 databytes
= ft
.read(buffersize
[1]);
1702 if not databytes
: break;
1703 datasize
= len(databytes
);
1704 fulldatasize
= datasize
+ fulldatasize
;
1707 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1708 downloaddiff
= fulldatasize
- prevdownsize
;
1709 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1710 prevdownsize
= fulldatasize
;
1713 fdata
= f
.getvalue();
1716 os
.remove(tmpfilename
);
1717 exec_time_end
= time
.time();
1718 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1719 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1720 if(outfile
=="-" and sys
.version
[0]>="3"):
1721 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, postdata
=None, buffersize
[0], sleep
);
1722 tmpfilename
= pretmpfilename
['Filename'];
1723 downloadsize
= os
.path
.getsize(tmpfilename
);
1726 exec_time_start
= time
.time();
1727 with
open(tmpfilename
, 'rb') as ft
:
1730 databytes
= ft
.read(buffersize
[1]);
1731 if not databytes
: break;
1732 datasize
= len(databytes
);
1733 fulldatasize
= datasize
+ fulldatasize
;
1736 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1737 downloaddiff
= fulldatasize
- prevdownsize
;
1738 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1739 prevdownsize
= fulldatasize
;
1742 fdata
= f
.getvalue();
1745 os
.remove(tmpfilename
);
1746 exec_time_end
= time
.time();
1747 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1748 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1751 if(not haveurllib3
):
1752 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1753 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
, outfile
, outpath
, sleep
)
1757 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, sleep
=-1):
1758 global geturls_download_sleep
;
1760 sleep
= geturls_download_sleep
;
1761 urlparts
= urlparse
.urlparse(httpurl
);
1762 if(urlparts
.username
is not None or urlparts
.password
is not None):
1763 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1764 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1765 geturls_opener
= mechanize
.Browser();
1766 if(isinstance(httpheaders
, dict)):
1767 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1769 geturls_opener
.addheaders
= httpheaders
;
1770 geturls_opener
.set_cookiejar(httpcookie
);
1771 geturls_opener
.set_handle_robots(False);
1772 geturls_text
= geturls_opener
.open(httpurl
);
1773 log
.info("Downloading URL "+httpurl
);
1774 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
1775 if(sys
.version
[0]=="2"):
1776 strbuf
= StringIO(geturls_text
.read());
1777 if(sys
.version
[0]>="3"):
1778 strbuf
= BytesIO(geturls_text
.read());
1779 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1780 returnval_content
= gzstrbuf
.read()[:];
1781 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
1782 returnval_content
= geturls_text
.read()[:];
1783 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.code
};
1784 geturls_text
.close();
1787 if(not havemechanize
):
1788 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, sleep
=-1):
1789 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, postdata
, sleep
)
1793 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, buffersize
=524288, sleep
=-1):
1794 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1795 exec_time_start
= time
.time();
1796 myhash
= hashlib
.new("sha1");
1797 if(sys
.version
[0]=="2"):
1798 myhash
.update(httpurl
);
1799 myhash
.update(str(buffersize
));
1800 myhash
.update(str(exec_time_start
));
1801 if(sys
.version
[0]>="3"):
1802 myhash
.update(httpurl
.encode('utf-8'));
1803 myhash
.update(str(buffersize
).encode('utf-8'));
1804 myhash
.update(str(exec_time_start
).encode('utf-8'));
1805 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1807 sleep
= geturls_download_sleep
;
1808 urlparts
= urlparse
.urlparse(httpurl
);
1809 if(urlparts
.username
is not None or urlparts
.password
is not None):
1810 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1811 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1812 geturls_opener
= mechanize
.Browser();
1813 if(isinstance(httpheaders
, dict)):
1814 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1816 geturls_opener
.addheaders
= httpheaders
;
1817 geturls_opener
.set_cookiejar(httpcookie
);
1818 geturls_opener
.set_handle_robots(False);
1819 geturls_text
= geturls_opener
.open(httpurl
);
1820 downloadsize
= int(geturls_text
.info().get('Content-Length'));
1821 if(downloadsize
is not None):
1822 downloadsize
= int(downloadsize
);
1823 if downloadsize
is None: downloadsize
= 0;
1826 log
.info("Downloading URL "+httpurl
);
1827 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1828 tmpfilename
= f
.name
;
1829 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.code
};
1831 databytes
= geturls_text
.read(buffersize
);
1832 if not databytes
: break;
1833 datasize
= len(databytes
);
1834 fulldatasize
= datasize
+ fulldatasize
;
1837 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1838 downloaddiff
= fulldatasize
- prevdownsize
;
1839 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1840 prevdownsize
= fulldatasize
;
1843 geturls_text
.close();
1844 exec_time_end
= time
.time();
1845 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1846 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1849 if(not havemechanize
):
1850 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, buffersize
=524288, sleep
=-1):
1851 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
, sleep
)
1855 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1856 global geturls_download_sleep
;
1858 sleep
= geturls_download_sleep
;
1859 if(not outfile
=="-"):
1860 outpath
= outpath
.rstrip(os
.path
.sep
);
1861 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1862 if(not os
.path
.exists(outpath
)):
1863 os
.makedirs(outpath
);
1864 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1866 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1868 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
[0], sleep
);
1869 tmpfilename
= pretmpfilename
['Filename'];
1870 downloadsize
= os
.path
.getsize(tmpfilename
);
1872 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1873 exec_time_start
= time
.time();
1874 shutil
.move(tmpfilename
, filepath
);
1875 exec_time_end
= time
.time();
1876 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1877 if(os
.path
.exists(tmpfilename
)):
1878 os
.remove(tmpfilename
);
1879 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1880 if(outfile
=="-" and sys
.version
[0]=="2"):
1881 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, buffersize
[0], sleep
);
1882 tmpfilename
= pretmpfilename
['Filename'];
1883 downloadsize
= os
.path
.getsize(tmpfilename
);
1886 exec_time_start
= time
.time();
1887 with
open(tmpfilename
, 'rb') as ft
:
1890 databytes
= ft
.read(buffersize
[1]);
1891 if not databytes
: break;
1892 datasize
= len(databytes
);
1893 fulldatasize
= datasize
+ fulldatasize
;
1896 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1897 downloaddiff
= fulldatasize
- prevdownsize
;
1898 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1899 prevdownsize
= fulldatasize
;
1902 fdata
= f
.getvalue();
1905 os
.remove(tmpfilename
);
1906 exec_time_end
= time
.time();
1907 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1908 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1909 if(outfile
=="-" and sys
.version
[0]>="3"):
1910 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
, buffersize
[0], sleep
);
1911 tmpfilename
= pretmpfilename
['Filename'];
1912 downloadsize
= os
.path
.getsize(tmpfilename
);
1915 exec_time_start
= time
.time();
1916 with
open(tmpfilename
, 'rb') as ft
:
1919 databytes
= ft
.read(buffersize
[1]);
1920 if not databytes
: break;
1921 datasize
= len(databytes
);
1922 fulldatasize
= datasize
+ fulldatasize
;
1925 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1926 downloaddiff
= fulldatasize
- prevdownsize
;
1927 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1928 prevdownsize
= fulldatasize
;
1931 fdata
= f
.getvalue();
1934 os
.remove(tmpfilename
);
1935 exec_time_end
= time
.time();
1936 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1937 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1940 if(not havemechanize
):
1941 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1942 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, postdata
, buffersize
, outfile
, outpath
, sleep
)
1945 def download_file_from_ftp_file(url
):
1946 urlparts
= urlparse
.urlparse(url
);
1947 file_name
= os
.path
.basename(urlparts
.path
);
1948 file_dir
= os
.path
.dirname(urlparts
.path
);
1949 if(urlparts
.username
is not None):
1950 ftp_username
= urlparts
.username
;
1952 ftp_username
= "anonymous";
1953 if(urlparts
.password
is not None):
1954 ftp_password
= urlparts
.password
;
1955 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
1956 ftp_password
= "anonymous";
1959 if(urlparts
.scheme
=="ftp"):
1961 elif(urlparts
.scheme
=="ftps"):
1965 ftp
.connect(urlparts
.hostname
, urlparts
.port
);
1966 ftp
.login(urlparts
.username
, urlparts
.password
);
1967 if(urlparts
.scheme
=="ftps"):
1969 ftpfile
= BytesIO();
1970 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
1971 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
1976 def download_file_from_ftp_string(url
):
1977 ftpfile
= download_file_from_ftp_file(url
);
1978 return ftpfile
.read();
1980 def upload_file_to_ftp_file(ftpfile
, url
):
1981 urlparts
= urlparse
.urlparse(url
);
1982 file_name
= os
.path
.basename(urlparts
.path
);
1983 file_dir
= os
.path
.dirname(urlparts
.path
);
1984 if(urlparts
.username
is not None):
1985 ftp_username
= urlparts
.username
;
1987 ftp_username
= "anonymous";
1988 if(urlparts
.password
is not None):
1989 ftp_password
= urlparts
.password
;
1990 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
1991 ftp_password
= "anonymous";
1994 if(urlparts
.scheme
=="ftp"):
1996 elif(urlparts
.scheme
=="ftps"):
2000 ftp
.connect(urlparts
.hostname
, urlparts
.port
);
2001 ftp
.login(urlparts
.username
, urlparts
.password
);
2002 if(urlparts
.scheme
=="ftps"):
2004 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
2009 def upload_file_to_ftp_string(ftpstring
, url
):
2010 ftpfileo
= BytesIO(ftpstring
);
2011 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
2016 def download_file_from_sftp_file(url
):
2017 urlparts
= urlparse
.urlparse(url
);
2018 file_name
= os
.path
.basename(urlparts
.path
);
2019 file_dir
= os
.path
.dirname(urlparts
.path
);
2020 sftp_port
= urlparts
.port
;
2021 if(urlparts
.port
is None):
2024 sftp_port
= urlparts
.port
;
2025 if(urlparts
.username
is not None):
2026 sftp_username
= urlparts
.username
;
2028 sftp_username
= "anonymous";
2029 if(urlparts
.password
is not None):
2030 sftp_password
= urlparts
.password
;
2031 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2032 sftp_password
= "anonymous";
2035 if(urlparts
.scheme
!="sftp"):
2037 ssh
= paramiko
.SSHClient();
2038 ssh
.load_system_host_keys();
2039 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
2041 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
2042 except paramiko
.ssh_exception
.SSHException
:
2044 sftp
= ssh
.open_sftp();
2045 sftpfile
= BytesIO();
2046 sftp
.getfo(urlparts
.path
, sftpfile
);
2049 sftpfile
.seek(0, 0);
2052 def download_file_from_sftp_file(url
):
2056 def download_file_from_sftp_string(url
):
2057 sftpfile
= download_file_from_sftp_file(url
);
2058 return sftpfile
.read();
2060 def download_file_from_ftp_string(url
):
2064 def upload_file_to_sftp_file(sftpfile
, url
):
2065 urlparts
= urlparse
.urlparse(url
);
2066 file_name
= os
.path
.basename(urlparts
.path
);
2067 file_dir
= os
.path
.dirname(urlparts
.path
);
2068 sftp_port
= urlparts
.port
;
2069 if(urlparts
.port
is None):
2072 sftp_port
= urlparts
.port
;
2073 if(urlparts
.username
is not None):
2074 sftp_username
= urlparts
.username
;
2076 sftp_username
= "anonymous";
2077 if(urlparts
.password
is not None):
2078 sftp_password
= urlparts
.password
;
2079 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2080 sftp_password
= "anonymous";
2083 if(urlparts
.scheme
!="sftp"):
2085 ssh
= paramiko
.SSHClient();
2086 ssh
.load_system_host_keys();
2087 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
2089 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
2090 except paramiko
.ssh_exception
.SSHException
:
2092 sftp
= ssh
.open_sftp();
2093 sftp
.putfo(sftpfile
, urlparts
.path
);
2096 sftpfile
.seek(0, 0);
2099 def upload_file_to_sftp_file(sftpfile
, url
):
2103 def upload_file_to_sftp_string(sftpstring
, url
):
2104 sftpfileo
= BytesIO(sftpstring
);
2105 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
2109 def upload_file_to_sftp_string(url
):