4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwget.py - Last Update: 9/15/2023 Ver. 0.9.4 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, gzip
, time
, argparse
, cgi
, subprocess
, socket
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
29 havemechanize
= False;
34 havemechanize
= False;
49 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
59 if(sys
.version
[0]=="2"):
61 from cStringIO
import StringIO
;
63 from StringIO
import StringIO
;
64 # From http://python-future.org/compatible_idioms.html
65 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
66 from urllib
import urlencode
;
67 from urllib
import urlopen
as urlopenalt
;
68 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
, URLError
, build_opener
, HTTPCookieProcessor
;
69 import urlparse
, cookielib
;
70 from httplib
import HTTPConnection
, HTTPSConnection
;
71 if(sys
.version
[0]>="3"):
72 from io
import StringIO
, BytesIO
;
73 # From http://python-future.org/compatible_idioms.html
74 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
75 from urllib
.request
import urlopen
, Request
, install_opener
, build_opener
, HTTPCookieProcessor
;
76 from urllib
.error
import HTTPError
, URLError
;
77 import urllib
.parse
as urlparse
;
78 import http
.cookiejar
as cookielib
;
79 from http
.client
import HTTPConnection
, HTTPSConnection
;
81 __program_name__
= "PyWWW-Get";
82 __program_alt_name__
= "PyWWWGet";
83 __program_small_name__
= "wwwget";
84 __project__
= __program_name__
;
85 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
86 __version_info__
= (0, 9, 4, "RC 1", 1);
87 __version_date_info__
= (2023, 9, 15, "RC 1", 1);
88 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
89 __revision__
= __version_info__
[3];
90 __revision_id__
= "$Id$";
91 if(__version_info__
[4] is not None):
92 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
93 if(__version_info__
[4] is None):
94 __version_date_plusrc__
= __version_date__
;
95 if(__version_info__
[3] is not None):
96 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
97 if(__version_info__
[3] is None):
98 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
100 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
102 pytempdir
= tempfile
.gettempdir();
104 compression_supported
= "gzip, deflate";
106 compression_supported
= "gzip, deflate, br";
108 compression_supported
= "gzip, deflate";
110 geturls_cj
= cookielib
.CookieJar();
111 windowsNT4_ua_string
= "Windows NT 4.0";
112 windowsNT4_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-PLATFORM': "4.0.0"};
113 windows2k_ua_string
= "Windows NT 5.0";
114 windows2k_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-PLATFORM': "5.0.0"};
115 windowsXP_ua_string
= "Windows NT 5.1";
116 windowsXP_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-PLATFORM': "5.1.0"};
117 windowsXP64_ua_string
= "Windows NT 5.2; Win64; x64";
118 windowsXP64_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-PLATFORM': "5.1.0"};
119 windows7_ua_string
= "Windows NT 6.1; Win64; x64";
120 windows7_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-PLATFORM': "6.1.0"};
121 windows8_ua_string
= "Windows NT 6.2; Win64; x64";
122 windows8_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-PLATFORM': "6.2.0"};
123 windows81_ua_string
= "Windows NT 6.3; Win64; x64";
124 windows81_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-PLATFORM': "6.3.0"};
125 windows10_ua_string
= "Windows NT 10.0; Win64; x64";
126 windows10_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-PLATFORM': "10.0.0"};
127 windows11_ua_string
= "Windows NT 11.0; Win64; x64";
128 windows11_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-PLATFORM': "11.0.0"};
129 geturls_ua_firefox_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:109.0) Gecko/20100101 Firefox/117.0";
130 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
131 geturls_ua_chrome_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
132 geturls_ua_chromium_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
133 geturls_ua_palemoon_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
134 geturls_ua_opera_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
135 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
136 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; Trident/7.0; rv:11.0) like Gecko";
137 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
138 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
139 if(platform
.python_implementation()!=""):
140 py_implementation
= platform
.python_implementation();
141 if(platform
.python_implementation()==""):
142 py_implementation
= "Python";
143 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=py_implementation
, pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
144 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
145 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
146 geturls_ua
= geturls_ua_firefox_windows7
;
147 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
148 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
149 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
150 geturls_headers_chrome_windows7
.update(windows7_ua_addon
);
151 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
152 geturls_headers_chromium_windows7
.update(windows7_ua_addon
);
153 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
154 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
155 geturls_headers_opera_windows7
.update(windows7_ua_addon
);
156 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
157 geturls_headers_vivaldi_windows7
.update(windows7_ua_addon
);
158 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
159 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
160 geturls_headers_microsoft_edge_windows7
.update(windows7_ua_addon
);
161 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
)};
162 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
)};
163 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
164 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
165 geturls_headers
= geturls_headers_firefox_windows7
;
166 geturls_download_sleep
= 0;
168 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
169 if(outtype
=="print" and dbgenable
):
172 elif(outtype
=="log" and dbgenable
):
173 logging
.info(dbgtxt
);
175 elif(outtype
=="warning" and dbgenable
):
176 logging
.warning(dbgtxt
);
178 elif(outtype
=="error" and dbgenable
):
179 logging
.error(dbgtxt
);
181 elif(outtype
=="critical" and dbgenable
):
182 logging
.critical(dbgtxt
);
184 elif(outtype
=="exception" and dbgenable
):
185 logging
.exception(dbgtxt
);
187 elif(outtype
=="logalt" and dbgenable
):
188 logging
.log(dgblevel
, dbgtxt
);
190 elif(outtype
=="debug" and dbgenable
):
191 logging
.debug(dbgtxt
);
199 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
200 verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
203 def add_url_param(url
, **params
):
205 parts
= list(urlparse
.urlsplit(url
));
206 d
= dict(cgi
.parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
208 parts
[n
]=urlencode(d
);
209 return urlparse
.urlunsplit(parts
);
211 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
212 def which_exec(execfile):
213 for path
in os
.environ
["PATH"].split(":"):
214 if os
.path
.exists(path
+ "/" + execfile):
215 return path
+ "/" + execfile;
217 def listize(varlist
):
225 newlistreg
.update({ilx
: varlist
[il
]});
226 newlistrev
.update({varlist
[il
]: ilx
});
229 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
232 def twolistize(varlist
):
242 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
243 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
244 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
245 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
248 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
249 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
250 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
253 def arglistize(proexec
, *varlist
):
257 newarglist
= [proexec
];
259 if varlist
[il
][0] is not None:
260 newarglist
.append(varlist
[il
][0]);
261 if varlist
[il
][1] is not None:
262 newarglist
.append(varlist
[il
][1]);
266 # hms_string by ArcGIS Python Recipes
267 # https://arcpy.wordpress.com/2012/04/20/146/
268 def hms_string(sec_elapsed
):
269 h
= int(sec_elapsed
/ (60 * 60));
270 m
= int((sec_elapsed
% (60 * 60)) / 60);
271 s
= sec_elapsed
% 60.0;
272 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
274 # get_readable_size by Lipis
275 # http://stackoverflow.com/posts/14998888/revisions
276 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
278 if(unit
!="IEC" and unit
!="SI"):
281 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
282 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
285 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
286 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
291 if abs(bytes
) < unitsize
:
292 strformat
= "%3."+str(precision
)+"f%s";
293 pre_return_val
= (strformat
% (bytes
, unit
));
294 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
295 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
296 alt_return_val
= pre_return_val
.split();
297 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
300 strformat
= "%."+str(precision
)+"f%s";
301 pre_return_val
= (strformat
% (bytes
, "YiB"));
302 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
303 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
304 alt_return_val
= pre_return_val
.split();
305 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
308 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
310 usehashtypes
= usehashtypes
.lower();
311 getfilesize
= os
.path
.getsize(infile
);
312 return_val
= get_readable_size(getfilesize
, precision
, unit
);
314 hashtypelist
= usehashtypes
.split(",");
315 openfile
= open(infile
, "rb");
316 filecontents
= openfile
.read();
319 listnumend
= len(hashtypelist
);
320 while(listnumcount
< listnumend
):
321 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
322 hashtypelistup
= hashtypelistlow
.upper();
323 filehash
= hashlib
.new(hashtypelistup
);
324 filehash
.update(filecontents
);
325 filegethash
= filehash
.hexdigest();
326 return_val
.update({hashtypelistup
: filegethash
});
330 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
332 usehashtypes
= usehashtypes
.lower();
333 getfilesize
= len(instring
);
334 return_val
= get_readable_size(getfilesize
, precision
, unit
);
336 hashtypelist
= usehashtypes
.split(",");
338 listnumend
= len(hashtypelist
);
339 while(listnumcount
< listnumend
):
340 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
341 hashtypelistup
= hashtypelistlow
.upper();
342 filehash
= hashlib
.new(hashtypelistup
);
343 if(sys
.version
[0]=="2"):
344 filehash
.update(instring
);
345 if(sys
.version
[0]>="3"):
346 filehash
.update(instring
.encode('utf-8'));
347 filegethash
= filehash
.hexdigest();
348 return_val
.update({hashtypelistup
: filegethash
});
352 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
353 if isinstance(headers
, dict):
355 if(sys
.version
[0]=="2"):
356 for headkey
, headvalue
in headers
.iteritems():
357 returnval
.append((headkey
, headvalue
));
358 if(sys
.version
[0]>="3"):
359 for headkey
, headvalue
in headers
.items():
360 returnval
.append((headkey
, headvalue
));
361 elif isinstance(headers
, list):
367 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
368 if isinstance(headers
, dict):
370 if(sys
.version
[0]=="2"):
371 for headkey
, headvalue
in headers
.iteritems():
372 returnval
.append(headkey
+": "+headvalue
);
373 if(sys
.version
[0]>="3"):
374 for headkey
, headvalue
in headers
.items():
375 returnval
.append(headkey
+": "+headvalue
);
376 elif isinstance(headers
, list):
382 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", compression_supported
), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
383 if isinstance(headers
, list):
388 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
390 elif isinstance(headers
, dict):
396 def get_httplib_support(checkvalue
=None):
397 global haverequests
, havemechanize
;
399 returnval
.append("httplib");
401 returnval
.append("httplib2");
402 returnval
.append("urllib");
404 returnval
.append("urllib3");
405 returnval
.append("request3");
406 returnval
.append("request");
408 returnval
.append("requests");
410 returnval
.append("mechanize");
411 if(not checkvalue
is None):
412 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
413 checkvalue
= "urllib";
414 if(checkvalue
=="httplib1"):
415 checkvalue
= "httplib";
416 if(checkvalue
in returnval
):
422 def check_httplib_support(checkvalue
="urllib"):
423 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
424 checkvalue
= "urllib";
425 if(checkvalue
=="httplib1"):
426 checkvalue
= "httplib";
427 returnval
= get_httplib_support(checkvalue
);
430 def get_httplib_support_list():
431 returnval
= get_httplib_support(None);
434 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", sleep
=-1):
435 global geturls_download_sleep
, haverequests
, havemechanize
;
437 sleep
= geturls_download_sleep
;
438 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
439 httplibuse
= "urllib";
440 if(httplibuse
=="httplib1"):
441 httplibuse
= "httplib";
442 if(not haverequests
and httplibuse
=="requests"):
443 httplibuse
= "urllib";
444 if(not havemechanize
and httplibuse
=="mechanize"):
445 httplibuse
= "urllib";
446 if(not havehttplib2
and httplibuse
=="httplib2"):
447 httplibuse
= "httplib";
448 if(httplibuse
=="urllib"):
449 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
450 elif(httplibuse
=="request"):
451 returnval
= download_from_url_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
452 elif(httplibuse
=="request3"):
453 returnval
= download_from_url_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
454 elif(httplibuse
=="httplib"):
455 returnval
= download_from_url_with_httplib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
456 elif(httplibuse
=="httplib2"):
457 returnval
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
458 elif(httplibuse
=="urllib3"):
459 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
460 elif(httplibuse
=="requests"):
461 returnval
= download_from_url_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
462 elif(httplibuse
=="mechanize"):
463 returnval
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
468 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1):
469 global geturls_download_sleep
, haverequests
, havemechanize
;
471 sleep
= geturls_download_sleep
;
472 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
473 httplibuse
= "urllib";
474 if(httplibuse
=="httplib1"):
475 httplibuse
= "httplib";
476 if(not haverequests
and httplibuse
=="requests"):
477 httplibuse
= "urllib";
478 if(not havemechanize
and httplibuse
=="mechanize"):
479 httplibuse
= "urllib";
480 if(not havehttplib2
and httplibuse
=="httplib2"):
481 httplibuse
= "httplib";
482 if(httplibuse
=="urllib"):
483 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
484 elif(httplibuse
=="request"):
485 returnval
= download_from_url_file_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
486 elif(httplibuse
=="request3"):
487 returnval
= download_from_url_file_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
488 elif(httplibuse
=="httplib"):
489 returnval
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
490 elif(httplibuse
=="httplib2"):
491 returnval
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
492 elif(httplibuse
=="urllib3"):
493 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
494 elif(httplibuse
=="requests"):
495 returnval
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
496 elif(httplibuse
=="mechanize"):
497 returnval
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
502 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
503 global geturls_download_sleep
, haverequests
, havemechanize
;
505 sleep
= geturls_download_sleep
;
506 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
507 httplibuse
= "urllib";
508 if(httplibuse
=="httplib1"):
509 httplibuse
= "httplib";
510 if(not haverequests
and httplibuse
=="requests"):
511 httplibuse
= "urllib";
512 if(not havemechanize
and httplibuse
=="mechanize"):
513 httplibuse
= "urllib";
514 if(not havehttplib2
and httplibuse
=="httplib2"):
515 httplibuse
= "httplib";
516 if(httplibuse
=="urllib"):
517 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
518 elif(httplibuse
=="request"):
519 returnval
= download_from_url_to_file_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
520 elif(httplibuse
=="request3"):
521 returnval
= download_from_url_to_file_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
522 elif(httplibuse
=="httplib"):
523 returnval
= download_from_url_to_file_with_httplib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
524 elif(httplibuse
=="httplib2"):
525 returnval
= download_from_url_to_file_with_httplib2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
526 elif(httplibuse
=="urllib3"):
527 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
528 elif(httplibuse
=="requests"):
529 returnval
= download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
530 elif(httplibuse
=="mechanize"):
531 returnval
= download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
536 def download_from_url_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
537 global geturls_download_sleep
, havebrotli
;
539 sleep
= geturls_download_sleep
;
540 urlparts
= urlparse
.urlparse(httpurl
);
541 if(urlparts
.username
is not None or urlparts
.password
is not None):
542 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
543 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
544 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
545 if(isinstance(httpheaders
, dict)):
546 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
547 geturls_opener
.addheaders
= httpheaders
;
549 if(postdata
is not None and not isinstance(postdata
, dict)):
550 postdata
= urlencode(postdata
);
552 if(httpmethod
=="GET"):
553 geturls_text
= geturls_opener
.open(httpurl
);
554 elif(httpmethod
=="POST"):
555 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
557 geturls_text
= geturls_opener
.open(httpurl
);
558 except HTTPError
as geturls_text_error
:
559 geturls_text
= geturls_text_error
;
560 log
.info("Error With URL "+httpurl
);
562 log
.info("Error With URL "+httpurl
);
564 except socket
.timeout
:
565 log
.info("Error With URL "+httpurl
);
567 log
.info("Downloading URL "+httpurl
);
568 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
569 if(sys
.version
[0]=="2"):
570 strbuf
= StringIO(geturls_text
.read());
571 if(sys
.version
[0]>="3"):
572 strbuf
= BytesIO(geturls_text
.read());
573 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
574 returnval_content
= gzstrbuf
.read()[:];
575 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
576 returnval_content
= geturls_text
.read()[:];
577 if(geturls_text
.info().get("Content-Encoding")=="br" and havebrotli
):
578 returnval_content
= brotli
.decompress(returnval_content
);
579 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.getcode()};
580 geturls_text
.close();
583 def download_from_url_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
584 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
585 exec_time_start
= time
.time();
586 myhash
= hashlib
.new("sha1");
587 if(sys
.version
[0]=="2"):
588 myhash
.update(httpurl
);
589 myhash
.update(str(buffersize
));
590 myhash
.update(str(exec_time_start
));
591 if(sys
.version
[0]>="3"):
592 myhash
.update(httpurl
.encode('utf-8'));
593 myhash
.update(str(buffersize
).encode('utf-8'));
594 myhash
.update(str(exec_time_start
).encode('utf-8'));
595 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
597 sleep
= geturls_download_sleep
;
598 urlparts
= urlparse
.urlparse(httpurl
);
599 if(urlparts
.username
is not None or urlparts
.password
is not None):
600 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
601 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
602 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
603 if(isinstance(httpheaders
, dict)):
604 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
605 geturls_opener
.addheaders
= httpheaders
;
608 if(httpmethod
=="GET"):
609 geturls_text
= geturls_opener
.open(httpurl
);
610 elif(httpmethod
=="POST"):
611 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
613 geturls_text
= geturls_opener
.open(httpurl
);
614 except HTTPError
as geturls_text_error
:
615 geturls_text
= geturls_text_error
;
616 log
.info("Error With URL "+httpurl
);
618 log
.info("Error With URL "+httpurl
);
620 except socket
.timeout
:
621 log
.info("Error With URL "+httpurl
);
623 except socket
.timeout
:
624 log
.info("Error With URL "+httpurl
);
626 downloadsize
= geturls_text
.info().get('Content-Length');
627 if(downloadsize
is not None):
628 downloadsize
= int(downloadsize
);
629 if downloadsize
is None: downloadsize
= 0;
632 log
.info("Downloading URL "+httpurl
);
633 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
634 tmpfilename
= f
.name
;
635 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.getcode()};
637 databytes
= geturls_text
.read(buffersize
);
638 if not databytes
: break;
639 datasize
= len(databytes
);
640 fulldatasize
= datasize
+ fulldatasize
;
643 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
644 downloaddiff
= fulldatasize
- prevdownsize
;
645 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
646 prevdownsize
= fulldatasize
;
649 geturls_text
.close();
650 exec_time_end
= time
.time();
651 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
652 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
655 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
656 global geturls_download_sleep
;
658 sleep
= geturls_download_sleep
;
659 if(not outfile
=="-"):
660 outpath
= outpath
.rstrip(os
.path
.sep
);
661 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
662 if(not os
.path
.exists(outpath
)):
663 os
.makedirs(outpath
);
664 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
666 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
668 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
669 if(not pretmpfilename
):
671 tmpfilename
= pretmpfilename
['Filename'];
672 downloadsize
= os
.path
.getsize(tmpfilename
);
674 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
675 exec_time_start
= time
.time();
676 shutil
.move(tmpfilename
, filepath
);
677 exec_time_end
= time
.time();
678 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
679 if(os
.path
.exists(tmpfilename
)):
680 os
.remove(tmpfilename
);
681 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
682 if(outfile
=="-" and sys
.version
[0]=="2"):
683 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
684 if(not pretmpfilename
):
686 tmpfilename
= pretmpfilename
['Filename'];
687 downloadsize
= os
.path
.getsize(tmpfilename
);
690 exec_time_start
= time
.time();
691 with
open(tmpfilename
, 'rb') as ft
:
694 databytes
= ft
.read(buffersize
[1]);
695 if not databytes
: break;
696 datasize
= len(databytes
);
697 fulldatasize
= datasize
+ fulldatasize
;
700 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
701 downloaddiff
= fulldatasize
- prevdownsize
;
702 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
703 prevdownsize
= fulldatasize
;
706 fdata
= f
.getvalue();
709 os
.remove(tmpfilename
);
710 exec_time_end
= time
.time();
711 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
712 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
713 if(outfile
=="-" and sys
.version
[0]>="3"):
714 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
715 tmpfilename
= pretmpfilename
['Filename'];
716 downloadsize
= os
.path
.getsize(tmpfilename
);
719 exec_time_start
= time
.time();
720 with
open(tmpfilename
, 'rb') as ft
:
723 databytes
= ft
.read(buffersize
[1]);
724 if not databytes
: break;
725 datasize
= len(databytes
);
726 fulldatasize
= datasize
+ fulldatasize
;
729 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
730 downloaddiff
= fulldatasize
- prevdownsize
;
731 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
732 prevdownsize
= fulldatasize
;
735 fdata
= f
.getvalue();
738 os
.remove(tmpfilename
);
739 exec_time_end
= time
.time();
740 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
741 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
744 def download_from_url_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
745 global geturls_download_sleep
, havebrotli
;
747 sleep
= geturls_download_sleep
;
748 urlparts
= urlparse
.urlparse(httpurl
);
749 if(urlparts
.username
is not None or urlparts
.password
is not None):
750 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
751 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
752 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
753 geturls_opener
.addheaders
= httpheaders
;
755 if(urlparts
[0]=="http"):
756 httpconn
= HTTPConnection(urlparts
[1]);
757 elif(urlparts
[0]=="https"):
758 httpconn
= HTTPSConnection(urlparts
[1]);
761 if(postdata
is not None and not isinstance(postdata
, dict)):
762 postdata
= urlencode(postdata
);
764 if(httpmethod
=="GET"):
765 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
766 elif(httpmethod
=="POST"):
767 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
769 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
770 except socket
.timeout
:
771 log
.info("Error With URL "+httpurl
);
773 geturls_text
= httpconn
.getresponse();
774 log
.info("Downloading URL "+httpurl
);
775 if(dict(geturls_text
.getheaders()).get("Content-Encoding")=="gzip" or dict(geturls_text
.getheaders()).get("Content-Encoding")=="deflate"):
776 if(sys
.version
[0]=="2"):
777 strbuf
= StringIO(geturls_text
.read());
778 if(sys
.version
[0]>="3"):
779 strbuf
= BytesIO(geturls_text
.read());
780 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
781 returnval_content
= gzstrbuf
.read()[:];
782 if(dict(geturls_text
.getheaders()).get("Content-Encoding")!="gzip" and dict(geturls_text
.getheaders()).get("Content-Encoding")!="deflate"):
783 returnval_content
= geturls_text
.read()[:];
784 if(dict(geturls_text
.getheaders()).get("Content-Encoding")=="br" and havebrotli
):
785 returnval_content
= brotli
.decompress(returnval_content
);
786 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.getheaders()), 'URL': httpurl
, 'Code': geturls_text
.status
};
787 geturls_text
.close();
790 def download_from_url_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
791 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
792 exec_time_start
= time
.time();
793 myhash
= hashlib
.new("sha1");
794 if(sys
.version
[0]=="2"):
795 myhash
.update(httpurl
);
796 myhash
.update(str(buffersize
));
797 myhash
.update(str(exec_time_start
));
798 if(sys
.version
[0]>="3"):
799 myhash
.update(httpurl
.encode('utf-8'));
800 myhash
.update(str(buffersize
).encode('utf-8'));
801 myhash
.update(str(exec_time_start
).encode('utf-8'));
802 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
804 sleep
= geturls_download_sleep
;
805 urlparts
= urlparse
.urlparse(httpurl
);
806 if(urlparts
.username
is not None or urlparts
.password
is not None):
807 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
808 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
809 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
810 geturls_opener
.addheaders
= httpheaders
;
812 if(urlparts
[0]=="http"):
813 httpconn
= HTTPConnection(urlparts
[1]);
814 elif(urlparts
[0]=="https"):
815 httpconn
= HTTPSConnection(urlparts
[1]);
818 if(postdata
is not None and not isinstance(postdata
, dict)):
819 postdata
= urlencode(postdata
);
821 if(httpmethod
=="GET"):
822 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
823 elif(httpmethod
=="POST"):
824 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
826 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
827 except socket
.timeout
:
828 log
.info("Error With URL "+httpurl
);
830 geturls_text
= httpconn
.getresponse();
831 downloadsize
= dict(geturls_text
.getheaders()).get('Content-Length');
832 if(downloadsize
is not None):
833 downloadsize
= int(downloadsize
);
834 if downloadsize
is None: downloadsize
= 0;
837 log
.info("Downloading URL "+httpurl
);
838 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
839 tmpfilename
= f
.name
;
840 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(dict(geturls_text
.getheaders())), 'URL': httpurl
, 'Code': geturls_text
.status
};
842 databytes
= geturls_text
.read(buffersize
);
843 if not databytes
: break;
844 datasize
= len(databytes
);
845 fulldatasize
= datasize
+ fulldatasize
;
848 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
849 downloaddiff
= fulldatasize
- prevdownsize
;
850 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
851 prevdownsize
= fulldatasize
;
854 geturls_text
.close();
855 exec_time_end
= time
.time();
856 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
857 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
860 def download_from_url_to_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
861 global geturls_download_sleep
;
863 sleep
= geturls_download_sleep
;
864 if(not outfile
=="-"):
865 outpath
= outpath
.rstrip(os
.path
.sep
);
866 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
867 if(not os
.path
.exists(outpath
)):
868 os
.makedirs(outpath
);
869 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
871 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
873 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
874 if(not pretmpfilename
):
876 tmpfilename
= pretmpfilename
['Filename'];
877 downloadsize
= os
.path
.getsize(tmpfilename
);
879 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
880 exec_time_start
= time
.time();
881 shutil
.move(tmpfilename
, filepath
);
882 exec_time_end
= time
.time();
883 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
884 if(os
.path
.exists(tmpfilename
)):
885 os
.remove(tmpfilename
);
886 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
887 if(outfile
=="-" and sys
.version
[0]=="2"):
888 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
889 if(not pretmpfilename
):
891 tmpfilename
= pretmpfilename
['Filename'];
892 downloadsize
= os
.path
.getsize(tmpfilename
);
895 exec_time_start
= time
.time();
896 with
open(tmpfilename
, 'rb') as ft
:
899 databytes
= ft
.read(buffersize
[1]);
900 if not databytes
: break;
901 datasize
= len(databytes
);
902 fulldatasize
= datasize
+ fulldatasize
;
905 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
906 downloaddiff
= fulldatasize
- prevdownsize
;
907 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
908 prevdownsize
= fulldatasize
;
911 fdata
= f
.getvalue();
914 os
.remove(tmpfilename
);
915 exec_time_end
= time
.time();
916 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
917 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
918 if(outfile
=="-" and sys
.version
[0]>="3"):
919 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
920 tmpfilename
= pretmpfilename
['Filename'];
921 downloadsize
= os
.path
.getsize(tmpfilename
);
924 exec_time_start
= time
.time();
925 with
open(tmpfilename
, 'rb') as ft
:
928 databytes
= ft
.read(buffersize
[1]);
929 if not databytes
: break;
930 datasize
= len(databytes
);
931 fulldatasize
= datasize
+ fulldatasize
;
934 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
935 downloaddiff
= fulldatasize
- prevdownsize
;
936 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
937 prevdownsize
= fulldatasize
;
940 fdata
= f
.getvalue();
943 os
.remove(tmpfilename
);
944 exec_time_end
= time
.time();
945 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
946 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
950 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
951 global geturls_download_sleep
, havebrotli
;
953 sleep
= geturls_download_sleep
;
954 urlparts
= urlparse
.urlparse(httpurl
);
955 if(urlparts
.username
is not None or urlparts
.password
is not None):
956 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
957 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
958 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
959 geturls_opener
.addheaders
= httpheaders
;
961 if(urlparts
[0]=="http"):
962 httpconn
= HTTPConnectionWithTimeout(urlparts
[1]);
963 elif(urlparts
[0]=="https"):
964 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1]);
967 if(postdata
is not None and not isinstance(postdata
, dict)):
968 postdata
= urlencode(postdata
);
970 if(httpmethod
=="GET"):
971 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
972 elif(httpmethod
=="POST"):
973 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
975 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
976 except socket
.timeout
:
977 log
.info("Error With URL "+httpurl
);
979 geturls_text
= httpconn
.getresponse();
980 log
.info("Downloading URL "+httpurl
);
981 if(dict(geturls_text
.getheaders()).get("Content-Encoding")=="gzip" or dict(geturls_text
.getheaders()).get("Content-Encoding")=="deflate"):
982 if(sys
.version
[0]=="2"):
983 strbuf
= StringIO(geturls_text
.read());
984 if(sys
.version
[0]>="3"):
985 strbuf
= BytesIO(geturls_text
.read());
986 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
987 returnval_content
= gzstrbuf
.read()[:];
988 if(dict(geturls_text
.getheaders()).get("Content-Encoding")!="gzip" and dict(geturls_text
.getheaders()).get("Content-Encoding")!="deflate"):
989 returnval_content
= geturls_text
.read()[:];
990 if(dict(geturls_text
.getheaders()).get("Content-Encoding")=="br" and havebrotli
):
991 returnval_content
= brotli
.decompress(returnval_content
);
992 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.getheaders()), 'URL': httpurl
, 'Code': geturls_text
.status
};
993 geturls_text
.close();
996 if(not havehttplib2
):
997 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
998 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
1002 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1003 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1004 exec_time_start
= time
.time();
1005 myhash
= hashlib
.new("sha1");
1006 if(sys
.version
[0]=="2"):
1007 myhash
.update(httpurl
);
1008 myhash
.update(str(buffersize
));
1009 myhash
.update(str(exec_time_start
));
1010 if(sys
.version
[0]>="3"):
1011 myhash
.update(httpurl
.encode('utf-8'));
1012 myhash
.update(str(buffersize
).encode('utf-8'));
1013 myhash
.update(str(exec_time_start
).encode('utf-8'));
1014 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1016 sleep
= geturls_download_sleep
;
1017 urlparts
= urlparse
.urlparse(httpurl
);
1018 if(urlparts
.username
is not None or urlparts
.password
is not None):
1019 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1020 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1021 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1022 geturls_opener
.addheaders
= httpheaders
;
1024 if(urlparts
[0]=="http"):
1025 httpconn
= HTTPConnectionWithTimeout(urlparts
[1]);
1026 elif(urlparts
[0]=="https"):
1027 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1]);
1030 if(postdata
is not None and not isinstance(postdata
, dict)):
1031 postdata
= urlencode(postdata
);
1033 if(httpmethod
=="GET"):
1034 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1035 elif(httpmethod
=="POST"):
1036 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1038 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1039 except socket
.timeout
:
1040 log
.info("Error With URL "+httpurl
);
1042 geturls_text
= httpconn
.getresponse();
1043 downloadsize
= dict(geturls_text
.getheaders()).get('Content-Length');
1044 if(downloadsize
is not None):
1045 downloadsize
= int(downloadsize
);
1046 if downloadsize
is None: downloadsize
= 0;
1049 log
.info("Downloading URL "+httpurl
);
1050 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1051 tmpfilename
= f
.name
;
1052 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(dict(geturls_text
.getheaders())), 'URL': httpurl
, 'Code': geturls_text
.status
};
1054 databytes
= geturls_text
.read(buffersize
);
1055 if not databytes
: break;
1056 datasize
= len(databytes
);
1057 fulldatasize
= datasize
+ fulldatasize
;
1060 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1061 downloaddiff
= fulldatasize
- prevdownsize
;
1062 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1063 prevdownsize
= fulldatasize
;
1066 geturls_text
.close();
1067 exec_time_end
= time
.time();
1068 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1069 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1072 if(not havehttplib2
):
1073 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1074 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
1078 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1079 global geturls_download_sleep
;
1081 sleep
= geturls_download_sleep
;
1082 if(not outfile
=="-"):
1083 outpath
= outpath
.rstrip(os
.path
.sep
);
1084 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1085 if(not os
.path
.exists(outpath
)):
1086 os
.makedirs(outpath
);
1087 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1089 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1091 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1092 if(not pretmpfilename
):
1094 tmpfilename
= pretmpfilename
['Filename'];
1095 downloadsize
= os
.path
.getsize(tmpfilename
);
1097 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1098 exec_time_start
= time
.time();
1099 shutil
.move(tmpfilename
, filepath
);
1100 exec_time_end
= time
.time();
1101 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1102 if(os
.path
.exists(tmpfilename
)):
1103 os
.remove(tmpfilename
);
1104 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1105 if(outfile
=="-" and sys
.version
[0]=="2"):
1106 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1107 if(not pretmpfilename
):
1109 tmpfilename
= pretmpfilename
['Filename'];
1110 downloadsize
= os
.path
.getsize(tmpfilename
);
1113 exec_time_start
= time
.time();
1114 with
open(tmpfilename
, 'rb') as ft
:
1117 databytes
= ft
.read(buffersize
[1]);
1118 if not databytes
: break;
1119 datasize
= len(databytes
);
1120 fulldatasize
= datasize
+ fulldatasize
;
1123 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1124 downloaddiff
= fulldatasize
- prevdownsize
;
1125 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1126 prevdownsize
= fulldatasize
;
1129 fdata
= f
.getvalue();
1132 os
.remove(tmpfilename
);
1133 exec_time_end
= time
.time();
1134 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1135 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1136 if(outfile
=="-" and sys
.version
[0]>="3"):
1137 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1138 tmpfilename
= pretmpfilename
['Filename'];
1139 downloadsize
= os
.path
.getsize(tmpfilename
);
1142 exec_time_start
= time
.time();
1143 with
open(tmpfilename
, 'rb') as ft
:
1146 databytes
= ft
.read(buffersize
[1]);
1147 if not databytes
: break;
1148 datasize
= len(databytes
);
1149 fulldatasize
= datasize
+ fulldatasize
;
1152 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1153 downloaddiff
= fulldatasize
- prevdownsize
;
1154 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1155 prevdownsize
= fulldatasize
;
1158 fdata
= f
.getvalue();
1161 os
.remove(tmpfilename
);
1162 exec_time_end
= time
.time();
1163 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1164 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1167 if(not havehttplib2
):
1168 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1169 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
1172 def download_from_url_with_request(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1173 global geturls_download_sleep
, havebrotli
;
1175 sleep
= geturls_download_sleep
;
1176 urlparts
= urlparse
.urlparse(httpurl
);
1177 if(urlparts
.username
is not None or urlparts
.password
is not None):
1178 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1179 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1180 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1181 if(isinstance(httpheaders
, dict)):
1182 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1183 geturls_opener
.addheaders
= httpheaders
;
1184 urllib
.request
.install_opener(geturls_opener
);
1186 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1187 if(postdata
is not None and not isinstance(postdata
, dict)):
1188 postdata
= urlencode(postdata
);
1190 if(httpmethod
=="GET"):
1191 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1192 geturls_text
= urlopen(geturls_request
);
1193 elif(httpmethod
=="POST"):
1194 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1195 geturls_text
= urlopen(geturls_request
, data
=postdata
);
1197 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1198 geturls_text
= urlopen(geturls_request
);
1199 except HTTPError
as geturls_text_error
:
1200 geturls_text
= geturls_text_error
;
1201 log
.info("Error With URL "+httpurl
);
1203 log
.info("Error With URL "+httpurl
);
1205 except socket
.timeout
:
1206 log
.info("Error With URL "+httpurl
);
1208 log
.info("Downloading URL "+httpurl
);
1209 if(geturls_text
.headers
.get("Content-Encoding")=="gzip" or geturls_text
.headers
.get("Content-Encoding")=="deflate"):
1210 if(sys
.version
[0]=="2"):
1211 strbuf
= StringIO(geturls_text
.read());
1212 if(sys
.version
[0]>="3"):
1213 strbuf
= BytesIO(geturls_text
.read());
1214 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1215 returnval_content
= gzstrbuf
.read()[:];
1216 if(geturls_text
.headers
.get("Content-Encoding")!="gzip" and geturls_text
.headers
.get("Content-Encoding")!="deflate"):
1217 returnval_content
= geturls_text
.read()[:];
1218 if(geturls_text
.headers
.get("Content-Encoding")=="br" and havebrotli
):
1219 returnval_content
= brotli
.decompress(returnval_content
);
1220 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.headers
), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.getcode()};
1221 geturls_text
.close();
1224 def download_from_url_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1225 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1226 exec_time_start
= time
.time();
1227 myhash
= hashlib
.new("sha1");
1228 if(sys
.version
[0]=="2"):
1229 myhash
.update(httpurl
);
1230 myhash
.update(str(buffersize
));
1231 myhash
.update(str(exec_time_start
));
1232 if(sys
.version
[0]>="3"):
1233 myhash
.update(httpurl
.encode('utf-8'));
1234 myhash
.update(str(buffersize
).encode('utf-8'));
1235 myhash
.update(str(exec_time_start
).encode('utf-8'));
1236 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1238 sleep
= geturls_download_sleep
;
1239 urlparts
= urlparse
.urlparse(httpurl
);
1240 if(urlparts
.username
is not None or urlparts
.password
is not None):
1241 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1242 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1243 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1244 if(isinstance(httpheaders
, dict)):
1245 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1246 geturls_opener
.addheaders
= httpheaders
;
1247 urllib
.request
.install_opener(geturls_opener
);
1249 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1250 if(postdata
is not None and not isinstance(postdata
, dict)):
1251 postdata
= urlencode(postdata
);
1253 if(httpmethod
=="GET"):
1254 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1255 geturls_text
= urlopen(geturls_request
);
1256 elif(httpmethod
=="POST"):
1257 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1258 geturls_text
= urlopen(geturls_request
, data
=postdata
);
1260 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1261 geturls_text
= urlopen(geturls_request
);
1262 except HTTPError
as geturls_text_error
:
1263 geturls_text
= geturls_text_error
;
1264 log
.info("Error With URL "+httpurl
);
1266 log
.info("Error With URL "+httpurl
);
1268 except socket
.timeout
:
1269 log
.info("Error With URL "+httpurl
);
1271 downloadsize
= geturls_text
.headers
.get('Content-Length');
1272 if(downloadsize
is not None):
1273 downloadsize
= int(downloadsize
);
1274 if downloadsize
is None: downloadsize
= 0;
1277 log
.info("Downloading URL "+httpurl
);
1278 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1279 tmpfilename
= f
.name
;
1280 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.headers
), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.getcode()};
1282 databytes
= geturls_text
.read(buffersize
);
1283 if not databytes
: break;
1284 datasize
= len(databytes
);
1285 fulldatasize
= datasize
+ fulldatasize
;
1288 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1289 downloaddiff
= fulldatasize
- prevdownsize
;
1290 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1291 prevdownsize
= fulldatasize
;
1294 geturls_text
.close();
1295 exec_time_end
= time
.time();
1296 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1297 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1300 def download_from_url_to_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1301 global geturls_download_sleep
;
1303 sleep
= geturls_download_sleep
;
1304 if(not outfile
=="-"):
1305 outpath
= outpath
.rstrip(os
.path
.sep
);
1306 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1307 if(not os
.path
.exists(outpath
)):
1308 os
.makedirs(outpath
);
1309 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1311 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1313 pretmpfilename
= download_from_url_file_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1314 if(not pretmpfilename
):
1316 tmpfilename
= pretmpfilename
['Filename'];
1317 downloadsize
= os
.path
.getsize(tmpfilename
);
1319 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1320 exec_time_start
= time
.time();
1321 shutil
.move(tmpfilename
, filepath
);
1322 exec_time_end
= time
.time();
1323 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1324 if(os
.path
.exists(tmpfilename
)):
1325 os
.remove(tmpfilename
);
1326 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1327 if(outfile
=="-" and sys
.version
[0]=="2"):
1328 pretmpfilename
= download_from_url_file_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1329 if(not pretmpfilename
):
1331 tmpfilename
= pretmpfilename
['Filename'];
1332 downloadsize
= os
.path
.getsize(tmpfilename
);
1335 exec_time_start
= time
.time();
1336 with
open(tmpfilename
, 'rb') as ft
:
1339 databytes
= ft
.read(buffersize
[1]);
1340 if not databytes
: break;
1341 datasize
= len(databytes
);
1342 fulldatasize
= datasize
+ fulldatasize
;
1345 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1346 downloaddiff
= fulldatasize
- prevdownsize
;
1347 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1348 prevdownsize
= fulldatasize
;
1351 fdata
= f
.getvalue();
1354 os
.remove(tmpfilename
);
1355 exec_time_end
= time
.time();
1356 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1357 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1358 if(outfile
=="-" and sys
.version
[0]>="3"):
1359 pretmpfilename
= download_from_url_file_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1360 tmpfilename
= pretmpfilename
['Filename'];
1361 downloadsize
= os
.path
.getsize(tmpfilename
);
1364 exec_time_start
= time
.time();
1365 with
open(tmpfilename
, 'rb') as ft
:
1368 databytes
= ft
.read(buffersize
[1]);
1369 if not databytes
: break;
1370 datasize
= len(databytes
);
1371 fulldatasize
= datasize
+ fulldatasize
;
1374 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1375 downloaddiff
= fulldatasize
- prevdownsize
;
1376 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1377 prevdownsize
= fulldatasize
;
1380 fdata
= f
.getvalue();
1383 os
.remove(tmpfilename
);
1384 exec_time_end
= time
.time();
1385 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1386 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1390 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1391 global geturls_download_sleep
, havebrotli
;
1393 sleep
= geturls_download_sleep
;
1394 urlparts
= urlparse
.urlparse(httpurl
);
1395 if(urlparts
.username
is not None or urlparts
.password
is not None):
1396 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1397 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1398 if(isinstance(httpheaders
, list)):
1399 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1401 if(postdata
is not None and not isinstance(postdata
, dict)):
1402 postdata
= urlencode(postdata
);
1404 if(httpmethod
=="GET"):
1405 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1406 elif(httpmethod
=="POST"):
1407 geturls_text
= requests
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1409 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1410 except requests
.exceptions
.ConnectTimeout
:
1411 log
.info("Error With URL "+httpurl
);
1413 except socket
.timeout
:
1414 log
.info("Error With URL "+httpurl
);
1416 log
.info("Downloading URL "+httpurl
);
1417 if(geturls_text
.headers
.get('Content-Type')=="gzip" or geturls_text
.headers
.get('Content-Type')=="deflate"):
1418 if(sys
.version
[0]=="2"):
1419 strbuf
= StringIO(geturls_text
.content
);
1420 if(sys
.version
[0]>="3"):
1421 strbuf
= BytesIO(geturls_text
.content
);
1422 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1423 returnval_content
= gzstrbuf
.content
[:];
1424 if(geturls_text
.headers
.get('Content-Type')!="gzip" and geturls_text
.headers
.get('Content-Type')!="deflate"):
1425 returnval_content
= geturls_text
.content
[:];
1426 if(geturls_text
.headers
.get("Content-Encoding")=="br" and havebrotli
):
1427 returnval_content
= brotli
.decompress(returnval_content
);
1428 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.headers
), 'URL': geturls_text
.url
, 'Code': geturls_text
.status_code
};
1429 geturls_text
.close();
1432 if(not haverequests
):
1433 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1434 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
1438 def download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1439 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1440 exec_time_start
= time
.time();
1441 myhash
= hashlib
.new("sha1");
1442 if(sys
.version
[0]=="2"):
1443 myhash
.update(httpurl
);
1444 myhash
.update(str(buffersize
));
1445 myhash
.update(str(exec_time_start
));
1446 if(sys
.version
[0]>="3"):
1447 myhash
.update(httpurl
.encode('utf-8'));
1448 myhash
.update(str(buffersize
).encode('utf-8'));
1449 myhash
.update(str(exec_time_start
).encode('utf-8'));
1450 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1452 sleep
= geturls_download_sleep
;
1453 urlparts
= urlparse
.urlparse(httpurl
);
1454 if(urlparts
.username
is not None or urlparts
.password
is not None):
1455 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1456 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1457 if(isinstance(httpheaders
, list)):
1458 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1460 if(postdata
is not None and not isinstance(postdata
, dict)):
1461 postdata
= urlencode(postdata
);
1463 if(httpmethod
=="GET"):
1464 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1465 elif(httpmethod
=="POST"):
1466 geturls_text
= requests
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1468 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1469 except requests
.exceptions
.ConnectTimeout
:
1470 log
.info("Error With URL "+httpurl
);
1472 except socket
.timeout
:
1473 log
.info("Error With URL "+httpurl
);
1475 downloadsize
= int(geturls_text
.headers
.get('Content-Length'));
1476 if(downloadsize
is not None):
1477 downloadsize
= int(downloadsize
);
1478 if downloadsize
is None: downloadsize
= 0;
1481 log
.info("Downloading URL "+httpurl
);
1482 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1483 tmpfilename
= f
.name
;
1484 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.headers
), 'URL': geturls_text
.url
, 'Code': geturls_text
.status_code
};
1485 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
1486 datasize
= len(databytes
);
1487 fulldatasize
= datasize
+ fulldatasize
;
1490 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1491 downloaddiff
= fulldatasize
- prevdownsize
;
1492 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1493 prevdownsize
= fulldatasize
;
1496 geturls_text
.close();
1497 exec_time_end
= time
.time();
1498 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1499 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1502 if(not haverequests
):
1503 def download_from_url_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1504 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
1508 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1509 global geturls_download_sleep
;
1511 sleep
= geturls_download_sleep
;
1512 if(not outfile
=="-"):
1513 outpath
= outpath
.rstrip(os
.path
.sep
);
1514 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1515 if(not os
.path
.exists(outpath
)):
1516 os
.makedirs(outpath
);
1517 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1519 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1521 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1522 if(not pretmpfilename
):
1524 tmpfilename
= pretmpfilename
['Filename'];
1525 downloadsize
= os
.path
.getsize(tmpfilename
);
1527 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1528 exec_time_start
= time
.time();
1529 shutil
.move(tmpfilename
, filepath
);
1530 exec_time_end
= time
.time();
1531 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1532 if(os
.path
.exists(tmpfilename
)):
1533 os
.remove(tmpfilename
);
1534 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1535 if(outfile
=="-" and sys
.version
[0]=="2"):
1536 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1537 if(not pretmpfilename
):
1539 tmpfilename
= pretmpfilename
['Filename'];
1540 downloadsize
= os
.path
.getsize(tmpfilename
);
1543 exec_time_start
= time
.time();
1544 with
open(tmpfilename
, 'rb') as ft
:
1547 databytes
= ft
.read(buffersize
[1]);
1548 if not databytes
: break;
1549 datasize
= len(databytes
);
1550 fulldatasize
= datasize
+ fulldatasize
;
1553 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1554 downloaddiff
= fulldatasize
- prevdownsize
;
1555 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1556 prevdownsize
= fulldatasize
;
1559 fdata
= f
.getvalue();
1562 os
.remove(tmpfilename
);
1563 exec_time_end
= time
.time();
1564 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1565 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1566 if(outfile
=="-" and sys
.version
[0]>="3"):
1567 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1568 tmpfilename
= pretmpfilename
['Filename'];
1569 downloadsize
= os
.path
.getsize(tmpfilename
);
1572 exec_time_start
= time
.time();
1573 with
open(tmpfilename
, 'rb') as ft
:
1576 databytes
= ft
.read(buffersize
[1]);
1577 if not databytes
: break;
1578 datasize
= len(databytes
);
1579 fulldatasize
= datasize
+ fulldatasize
;
1582 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1583 downloaddiff
= fulldatasize
- prevdownsize
;
1584 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1585 prevdownsize
= fulldatasize
;
1588 fdata
= f
.getvalue();
1591 os
.remove(tmpfilename
);
1592 exec_time_end
= time
.time();
1593 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1594 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1597 if(not haverequests
):
1598 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1599 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
1603 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1604 global geturls_download_sleep
, havebrotli
;
1606 sleep
= geturls_download_sleep
;
1607 urlparts
= urlparse
.urlparse(httpurl
);
1608 if(urlparts
.username
is not None or urlparts
.password
is not None):
1609 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1610 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1611 if(isinstance(httpheaders
, list)):
1612 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1614 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
1615 if(postdata
is not None and not isinstance(postdata
, dict)):
1616 postdata
= urlencode(postdata
);
1618 if(httpmethod
=="GET"):
1619 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1620 elif(httpmethod
=="POST"):
1621 geturls_text
= geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
1623 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1624 except urllib3
.exceptions
.ConnectTimeoutError
:
1625 log
.info("Error With URL "+httpurl
);
1627 except urllib3
.exceptions
.MaxRetryError
:
1628 log
.info("Error With URL "+httpurl
);
1630 except socket
.timeout
:
1631 log
.info("Error With URL "+httpurl
);
1633 log
.info("Downloading URL "+httpurl
);
1634 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
1635 if(sys
.version
[0]=="2"):
1636 strbuf
= StringIO(geturls_text
.read());
1637 if(sys
.version
[0]>="3"):
1638 strbuf
= BytesIO(geturls_text
.read());
1639 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1640 returnval_content
= gzstrbuf
.read()[:];
1641 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
1642 returnval_content
= geturls_text
.read()[:];
1643 if(geturls_text
.info().get("Content-Encoding")=="br" and havebrotli
):
1644 returnval_content
= brotli
.decompress(returnval_content
);
1645 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.status
};
1646 geturls_text
.close();
1649 if(not haveurllib3
):
1650 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1651 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
1655 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1656 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1657 exec_time_start
= time
.time();
1658 myhash
= hashlib
.new("sha1");
1659 if(sys
.version
[0]=="2"):
1660 myhash
.update(httpurl
);
1661 myhash
.update(str(buffersize
));
1662 myhash
.update(str(exec_time_start
));
1663 if(sys
.version
[0]>="3"):
1664 myhash
.update(httpurl
.encode('utf-8'));
1665 myhash
.update(str(buffersize
).encode('utf-8'));
1666 myhash
.update(str(exec_time_start
).encode('utf-8'));
1667 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1669 sleep
= geturls_download_sleep
;
1670 urlparts
= urlparse
.urlparse(httpurl
);
1671 if(urlparts
.username
is not None or urlparts
.password
is not None):
1672 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1673 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1674 if(isinstance(httpheaders
, list)):
1675 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1677 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
1678 if(postdata
is not None and not isinstance(postdata
, dict)):
1679 postdata
= urlencode(postdata
);
1681 if(httpmethod
=="GET"):
1682 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1683 elif(httpmethod
=="POST"):
1684 geturls_text
= geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
1686 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1687 except urllib3
.exceptions
.ConnectTimeoutError
:
1688 log
.info("Error With URL "+httpurl
);
1690 except urllib3
.exceptions
.MaxRetryError
:
1691 log
.info("Error With URL "+httpurl
);
1693 except socket
.timeout
:
1694 log
.info("Error With URL "+httpurl
);
1696 downloadsize
= int(geturls_text
.headers
.get('Content-Length'));
1697 if(downloadsize
is not None):
1698 downloadsize
= int(downloadsize
);
1699 if downloadsize
is None: downloadsize
= 0;
1702 log
.info("Downloading URL "+httpurl
);
1703 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1704 tmpfilename
= f
.name
;
1705 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.status
};
1707 databytes
= geturls_text
.read(buffersize
);
1708 if not databytes
: break;
1709 datasize
= len(databytes
);
1710 fulldatasize
= datasize
+ fulldatasize
;
1713 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1714 downloaddiff
= fulldatasize
- prevdownsize
;
1715 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1716 prevdownsize
= fulldatasize
;
1719 geturls_text
.close();
1720 exec_time_end
= time
.time();
1721 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1722 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1725 if(not haveurllib3
):
1726 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1727 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
1731 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1732 global geturls_download_sleep
;
1734 sleep
= geturls_download_sleep
;
1735 if(not outfile
=="-"):
1736 outpath
= outpath
.rstrip(os
.path
.sep
);
1737 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1738 if(not os
.path
.exists(outpath
)):
1739 os
.makedirs(outpath
);
1740 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1742 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1744 pretmpfilename
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1745 if(not pretmpfilename
):
1747 tmpfilename
= pretmpfilename
['Filename'];
1748 downloadsize
= os
.path
.getsize(tmpfilename
);
1750 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1751 exec_time_start
= time
.time();
1752 shutil
.move(tmpfilename
, filepath
);
1753 exec_time_end
= time
.time();
1754 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1755 if(os
.path
.exists(tmpfilename
)):
1756 os
.remove(tmpfilename
);
1757 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1758 if(outfile
=="-" and sys
.version
[0]=="2"):
1759 pretmpfilename
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1760 if(not pretmpfilename
):
1762 tmpfilename
= pretmpfilename
['Filename'];
1763 downloadsize
= os
.path
.getsize(tmpfilename
);
1766 exec_time_start
= time
.time();
1767 with
open(tmpfilename
, 'rb') as ft
:
1770 databytes
= ft
.read(buffersize
[1]);
1771 if not databytes
: break;
1772 datasize
= len(databytes
);
1773 fulldatasize
= datasize
+ fulldatasize
;
1776 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1777 downloaddiff
= fulldatasize
- prevdownsize
;
1778 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1779 prevdownsize
= fulldatasize
;
1782 fdata
= f
.getvalue();
1785 os
.remove(tmpfilename
);
1786 exec_time_end
= time
.time();
1787 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1788 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1789 if(outfile
=="-" and sys
.version
[0]>="3"):
1790 pretmpfilename
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1791 tmpfilename
= pretmpfilename
['Filename'];
1792 downloadsize
= os
.path
.getsize(tmpfilename
);
1795 exec_time_start
= time
.time();
1796 with
open(tmpfilename
, 'rb') as ft
:
1799 databytes
= ft
.read(buffersize
[1]);
1800 if not databytes
: break;
1801 datasize
= len(databytes
);
1802 fulldatasize
= datasize
+ fulldatasize
;
1805 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1806 downloaddiff
= fulldatasize
- prevdownsize
;
1807 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1808 prevdownsize
= fulldatasize
;
1811 fdata
= f
.getvalue();
1814 os
.remove(tmpfilename
);
1815 exec_time_end
= time
.time();
1816 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1817 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1820 if(not haveurllib3
):
1821 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1822 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
1826 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1827 global geturls_download_sleep
, havebrotli
;
1829 sleep
= geturls_download_sleep
;
1830 urlparts
= urlparse
.urlparse(httpurl
);
1831 if(urlparts
.username
is not None or urlparts
.password
is not None):
1832 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1833 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1834 if(isinstance(httpheaders
, list)):
1835 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1837 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
1838 if(postdata
is not None and not isinstance(postdata
, dict)):
1839 postdata
= urlencode(postdata
);
1841 if(httpmethod
=="GET"):
1842 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1843 elif(httpmethod
=="POST"):
1844 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
1846 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1847 except urllib3
.exceptions
.ConnectTimeoutError
:
1848 log
.info("Error With URL "+httpurl
);
1850 except urllib3
.exceptions
.MaxRetryError
:
1851 log
.info("Error With URL "+httpurl
);
1853 except socket
.timeout
:
1854 log
.info("Error With URL "+httpurl
);
1856 log
.info("Downloading URL "+httpurl
);
1857 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
1858 if(sys
.version
[0]=="2"):
1859 strbuf
= StringIO(geturls_text
.read());
1860 if(sys
.version
[0]>="3"):
1861 strbuf
= BytesIO(geturls_text
.read());
1862 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1863 returnval_content
= gzstrbuf
.read()[:];
1864 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
1865 returnval_content
= geturls_text
.read()[:];
1866 if(geturls_text
.info().get("Content-Encoding")=="br" and havebrotli
):
1867 returnval_content
= brotli
.decompress(returnval_content
);
1868 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.status
};
1869 geturls_text
.close();
1872 if(not haveurllib3
):
1873 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1874 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
1878 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1879 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1880 exec_time_start
= time
.time();
1881 myhash
= hashlib
.new("sha1");
1882 if(sys
.version
[0]=="2"):
1883 myhash
.update(httpurl
);
1884 myhash
.update(str(buffersize
));
1885 myhash
.update(str(exec_time_start
));
1886 if(sys
.version
[0]>="3"):
1887 myhash
.update(httpurl
.encode('utf-8'));
1888 myhash
.update(str(buffersize
).encode('utf-8'));
1889 myhash
.update(str(exec_time_start
).encode('utf-8'));
1890 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1892 sleep
= geturls_download_sleep
;
1893 urlparts
= urlparse
.urlparse(httpurl
);
1894 if(urlparts
.username
is not None or urlparts
.password
is not None):
1895 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1896 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1897 if(isinstance(httpheaders
, list)):
1898 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1900 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
1901 if(postdata
is not None and not isinstance(postdata
, dict)):
1902 postdata
= urlencode(postdata
);
1904 if(httpmethod
=="GET"):
1905 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1906 elif(httpmethod
=="POST"):
1907 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
1909 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1910 except urllib3
.exceptions
.ConnectTimeoutError
:
1911 log
.info("Error With URL "+httpurl
);
1913 except urllib3
.exceptions
.MaxRetryError
:
1914 log
.info("Error With URL "+httpurl
);
1916 except socket
.timeout
:
1917 log
.info("Error With URL "+httpurl
);
1919 downloadsize
= int(geturls_text
.headers
.get('Content-Length'));
1920 if(downloadsize
is not None):
1921 downloadsize
= int(downloadsize
);
1922 if downloadsize
is None: downloadsize
= 0;
1925 log
.info("Downloading URL "+httpurl
);
1926 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1927 tmpfilename
= f
.name
;
1928 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.status
};
1930 databytes
= geturls_text
.read(buffersize
);
1931 if not databytes
: break;
1932 datasize
= len(databytes
);
1933 fulldatasize
= datasize
+ fulldatasize
;
1936 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1937 downloaddiff
= fulldatasize
- prevdownsize
;
1938 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1939 prevdownsize
= fulldatasize
;
1942 geturls_text
.close();
1943 exec_time_end
= time
.time();
1944 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1945 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1948 if(not haveurllib3
):
1949 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1950 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
1954 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1955 global geturls_download_sleep
;
1957 sleep
= geturls_download_sleep
;
1958 if(not outfile
=="-"):
1959 outpath
= outpath
.rstrip(os
.path
.sep
);
1960 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1961 if(not os
.path
.exists(outpath
)):
1962 os
.makedirs(outpath
);
1963 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1965 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1967 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1968 if(not pretmpfilename
):
1970 tmpfilename
= pretmpfilename
['Filename'];
1971 downloadsize
= os
.path
.getsize(tmpfilename
);
1973 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1974 exec_time_start
= time
.time();
1975 shutil
.move(tmpfilename
, filepath
);
1976 exec_time_end
= time
.time();
1977 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1978 if(os
.path
.exists(tmpfilename
)):
1979 os
.remove(tmpfilename
);
1980 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1981 if(outfile
=="-" and sys
.version
[0]=="2"):
1982 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1983 if(not pretmpfilename
):
1985 tmpfilename
= pretmpfilename
['Filename'];
1986 downloadsize
= os
.path
.getsize(tmpfilename
);
1989 exec_time_start
= time
.time();
1990 with
open(tmpfilename
, 'rb') as ft
:
1993 databytes
= ft
.read(buffersize
[1]);
1994 if not databytes
: break;
1995 datasize
= len(databytes
);
1996 fulldatasize
= datasize
+ fulldatasize
;
1999 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2000 downloaddiff
= fulldatasize
- prevdownsize
;
2001 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2002 prevdownsize
= fulldatasize
;
2005 fdata
= f
.getvalue();
2008 os
.remove(tmpfilename
);
2009 exec_time_end
= time
.time();
2010 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2011 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2012 if(outfile
=="-" and sys
.version
[0]>="3"):
2013 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2014 tmpfilename
= pretmpfilename
['Filename'];
2015 downloadsize
= os
.path
.getsize(tmpfilename
);
2018 exec_time_start
= time
.time();
2019 with
open(tmpfilename
, 'rb') as ft
:
2022 databytes
= ft
.read(buffersize
[1]);
2023 if not databytes
: break;
2024 datasize
= len(databytes
);
2025 fulldatasize
= datasize
+ fulldatasize
;
2028 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2029 downloaddiff
= fulldatasize
- prevdownsize
;
2030 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2031 prevdownsize
= fulldatasize
;
2034 fdata
= f
.getvalue();
2037 os
.remove(tmpfilename
);
2038 exec_time_end
= time
.time();
2039 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2040 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2043 if(not haveurllib3
):
2044 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2045 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
2049 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2050 global geturls_download_sleep
, havebrotli
;
2052 sleep
= geturls_download_sleep
;
2053 urlparts
= urlparse
.urlparse(httpurl
);
2054 if(urlparts
.username
is not None or urlparts
.password
is not None):
2055 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2056 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2057 geturls_opener
= mechanize
.Browser();
2058 if(isinstance(httpheaders
, dict)):
2059 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
2061 geturls_opener
.addheaders
= httpheaders
;
2062 geturls_opener
.set_cookiejar(httpcookie
);
2063 geturls_opener
.set_handle_robots(False);
2064 if(postdata
is not None and not isinstance(postdata
, dict)):
2065 postdata
= urlencode(postdata
);
2067 if(httpmethod
=="GET"):
2068 geturls_text
= geturls_opener
.open(httpurl
);
2069 elif(httpmethod
=="POST"):
2070 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
2072 geturls_text
= geturls_opener
.open(httpurl
);
2073 except mechanize
.HTTPError
as geturls_text_error
:
2074 geturls_text
= geturls_text_error
;
2075 log
.info("Error With URL "+httpurl
);
2077 log
.info("Error With URL "+httpurl
);
2079 except socket
.timeout
:
2080 log
.info("Error With URL "+httpurl
);
2082 log
.info("Downloading URL "+httpurl
);
2083 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
2084 if(sys
.version
[0]=="2"):
2085 strbuf
= StringIO(geturls_text
.read());
2086 if(sys
.version
[0]>="3"):
2087 strbuf
= BytesIO(geturls_text
.read());
2088 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
2089 returnval_content
= gzstrbuf
.read()[:];
2090 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
2091 returnval_content
= geturls_text
.read()[:];
2092 if(geturls_text
.info().get("Content-Encoding")=="br" and havebrotli
):
2093 returnval_content
= brotli
.decompress(returnval_content
);
2094 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.code
};
2095 geturls_text
.close();
2098 if(not havemechanize
):
2099 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2100 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
2104 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2105 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
2106 exec_time_start
= time
.time();
2107 myhash
= hashlib
.new("sha1");
2108 if(sys
.version
[0]=="2"):
2109 myhash
.update(httpurl
);
2110 myhash
.update(str(buffersize
));
2111 myhash
.update(str(exec_time_start
));
2112 if(sys
.version
[0]>="3"):
2113 myhash
.update(httpurl
.encode('utf-8'));
2114 myhash
.update(str(buffersize
).encode('utf-8'));
2115 myhash
.update(str(exec_time_start
).encode('utf-8'));
2116 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2118 sleep
= geturls_download_sleep
;
2119 urlparts
= urlparse
.urlparse(httpurl
);
2120 if(urlparts
.username
is not None or urlparts
.password
is not None):
2121 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2122 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2123 geturls_opener
= mechanize
.Browser();
2124 if(isinstance(httpheaders
, dict)):
2125 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
2127 geturls_opener
.addheaders
= httpheaders
;
2128 geturls_opener
.set_cookiejar(httpcookie
);
2129 geturls_opener
.set_handle_robots(False);
2130 if(postdata
is not None and not isinstance(postdata
, dict)):
2131 postdata
= urlencode(postdata
);
2133 if(httpmethod
=="GET"):
2134 geturls_text
= geturls_opener
.open(httpurl
);
2135 elif(httpmethod
=="POST"):
2136 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
2138 geturls_text
= geturls_opener
.open(httpurl
);
2139 except mechanize
.HTTPError
as geturls_text_error
:
2140 geturls_text
= geturls_text_error
;
2141 log
.info("Error With URL "+httpurl
);
2143 log
.info("Error With URL "+httpurl
);
2145 except socket
.timeout
:
2146 log
.info("Error With URL "+httpurl
);
2148 downloadsize
= int(geturls_text
.info().get('Content-Length'));
2149 if(downloadsize
is not None):
2150 downloadsize
= int(downloadsize
);
2151 if downloadsize
is None: downloadsize
= 0;
2154 log
.info("Downloading URL "+httpurl
);
2155 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2156 tmpfilename
= f
.name
;
2157 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.info()), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.code
};
2159 databytes
= geturls_text
.read(buffersize
);
2160 if not databytes
: break;
2161 datasize
= len(databytes
);
2162 fulldatasize
= datasize
+ fulldatasize
;
2165 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2166 downloaddiff
= fulldatasize
- prevdownsize
;
2167 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2168 prevdownsize
= fulldatasize
;
2171 geturls_text
.close();
2172 exec_time_end
= time
.time();
2173 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2174 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2177 if(not havemechanize
):
2178 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2179 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
2183 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2184 global geturls_download_sleep
;
2186 sleep
= geturls_download_sleep
;
2187 if(not outfile
=="-"):
2188 outpath
= outpath
.rstrip(os
.path
.sep
);
2189 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2190 if(not os
.path
.exists(outpath
)):
2191 os
.makedirs(outpath
);
2192 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2194 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2196 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2197 if(not pretmpfilename
):
2199 tmpfilename
= pretmpfilename
['Filename'];
2200 downloadsize
= os
.path
.getsize(tmpfilename
);
2202 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2203 exec_time_start
= time
.time();
2204 shutil
.move(tmpfilename
, filepath
);
2205 exec_time_end
= time
.time();
2206 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2207 if(os
.path
.exists(tmpfilename
)):
2208 os
.remove(tmpfilename
);
2209 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2210 if(outfile
=="-" and sys
.version
[0]=="2"):
2211 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2212 if(not pretmpfilename
):
2214 tmpfilename
= pretmpfilename
['Filename'];
2215 downloadsize
= os
.path
.getsize(tmpfilename
);
2218 exec_time_start
= time
.time();
2219 with
open(tmpfilename
, 'rb') as ft
:
2222 databytes
= ft
.read(buffersize
[1]);
2223 if not databytes
: break;
2224 datasize
= len(databytes
);
2225 fulldatasize
= datasize
+ fulldatasize
;
2228 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2229 downloaddiff
= fulldatasize
- prevdownsize
;
2230 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2231 prevdownsize
= fulldatasize
;
2234 fdata
= f
.getvalue();
2237 os
.remove(tmpfilename
);
2238 exec_time_end
= time
.time();
2239 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2240 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2241 if(outfile
=="-" and sys
.version
[0]>="3"):
2242 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2243 tmpfilename
= pretmpfilename
['Filename'];
2244 downloadsize
= os
.path
.getsize(tmpfilename
);
2247 exec_time_start
= time
.time();
2248 with
open(tmpfilename
, 'rb') as ft
:
2251 databytes
= ft
.read(buffersize
[1]);
2252 if not databytes
: break;
2253 datasize
= len(databytes
);
2254 fulldatasize
= datasize
+ fulldatasize
;
2257 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2258 downloaddiff
= fulldatasize
- prevdownsize
;
2259 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2260 prevdownsize
= fulldatasize
;
2263 fdata
= f
.getvalue();
2266 os
.remove(tmpfilename
);
2267 exec_time_end
= time
.time();
2268 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2269 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2272 if(not havemechanize
):
2273 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2274 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
2277 def download_file_from_ftp_file(url
):
2278 urlparts
= urlparse
.urlparse(url
);
2279 file_name
= os
.path
.basename(urlparts
.path
);
2280 file_dir
= os
.path
.dirname(urlparts
.path
);
2281 if(urlparts
.username
is not None):
2282 ftp_username
= urlparts
.username
;
2284 ftp_username
= "anonymous";
2285 if(urlparts
.password
is not None):
2286 ftp_password
= urlparts
.password
;
2287 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2288 ftp_password
= "anonymous";
2291 if(urlparts
.scheme
=="ftp"):
2293 elif(urlparts
.scheme
=="ftps"):
2297 ftp
.connect(urlparts
.hostname
, urlparts
.port
);
2298 ftp
.login(urlparts
.username
, urlparts
.password
);
2299 if(urlparts
.scheme
=="ftps"):
2301 ftpfile
= BytesIO();
2302 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
2303 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
2308 def download_file_from_ftp_string(url
):
2309 ftpfile
= download_file_from_ftp_file(url
);
2310 return ftpfile
.read();
2312 def upload_file_to_ftp_file(ftpfile
, url
):
2313 urlparts
= urlparse
.urlparse(url
);
2314 file_name
= os
.path
.basename(urlparts
.path
);
2315 file_dir
= os
.path
.dirname(urlparts
.path
);
2316 if(urlparts
.username
is not None):
2317 ftp_username
= urlparts
.username
;
2319 ftp_username
= "anonymous";
2320 if(urlparts
.password
is not None):
2321 ftp_password
= urlparts
.password
;
2322 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2323 ftp_password
= "anonymous";
2326 if(urlparts
.scheme
=="ftp"):
2328 elif(urlparts
.scheme
=="ftps"):
2332 ftp
.connect(urlparts
.hostname
, urlparts
.port
);
2333 ftp
.login(urlparts
.username
, urlparts
.password
);
2334 if(urlparts
.scheme
=="ftps"):
2336 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
2341 def upload_file_to_ftp_string(ftpstring
, url
):
2342 ftpfileo
= BytesIO(ftpstring
);
2343 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
2348 def download_file_from_sftp_file(url
):
2349 urlparts
= urlparse
.urlparse(url
);
2350 file_name
= os
.path
.basename(urlparts
.path
);
2351 file_dir
= os
.path
.dirname(urlparts
.path
);
2352 sftp_port
= urlparts
.port
;
2353 if(urlparts
.port
is None):
2356 sftp_port
= urlparts
.port
;
2357 if(urlparts
.username
is not None):
2358 sftp_username
= urlparts
.username
;
2360 sftp_username
= "anonymous";
2361 if(urlparts
.password
is not None):
2362 sftp_password
= urlparts
.password
;
2363 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2364 sftp_password
= "anonymous";
2367 if(urlparts
.scheme
!="sftp"):
2369 ssh
= paramiko
.SSHClient();
2370 ssh
.load_system_host_keys();
2371 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
2373 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
2374 except paramiko
.ssh_exception
.SSHException
:
2376 sftp
= ssh
.open_sftp();
2377 sftpfile
= BytesIO();
2378 sftp
.getfo(urlparts
.path
, sftpfile
);
2381 sftpfile
.seek(0, 0);
2384 def download_file_from_sftp_file(url
):
2388 def download_file_from_sftp_string(url
):
2389 sftpfile
= download_file_from_sftp_file(url
);
2390 return sftpfile
.read();
2392 def download_file_from_ftp_string(url
):
2396 def upload_file_to_sftp_file(sftpfile
, url
):
2397 urlparts
= urlparse
.urlparse(url
);
2398 file_name
= os
.path
.basename(urlparts
.path
);
2399 file_dir
= os
.path
.dirname(urlparts
.path
);
2400 sftp_port
= urlparts
.port
;
2401 if(urlparts
.port
is None):
2404 sftp_port
= urlparts
.port
;
2405 if(urlparts
.username
is not None):
2406 sftp_username
= urlparts
.username
;
2408 sftp_username
= "anonymous";
2409 if(urlparts
.password
is not None):
2410 sftp_password
= urlparts
.password
;
2411 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2412 sftp_password
= "anonymous";
2415 if(urlparts
.scheme
!="sftp"):
2417 ssh
= paramiko
.SSHClient();
2418 ssh
.load_system_host_keys();
2419 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
2421 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
2422 except paramiko
.ssh_exception
.SSHException
:
2424 sftp
= ssh
.open_sftp();
2425 sftp
.putfo(sftpfile
, urlparts
.path
);
2428 sftpfile
.seek(0, 0);
2431 def upload_file_to_sftp_file(sftpfile
, url
):
2435 def upload_file_to_sftp_string(sftpstring
, url
):
2436 sftpfileo
= BytesIO(sftpstring
);
2437 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
2441 def upload_file_to_sftp_string(url
):