4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the Revised BSD License.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 Revised BSD License for more details.
12 Copyright 2016-2023 Game Maker 2k - https://github.com/GameMaker2k
13 Copyright 2016-2023 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
15 $FileInfo: pywwwget.py - Last Update: 9/15/2023 Ver. 0.9.4 RC 1 - Author: cooldude2k $
18 from __future__
import division
, absolute_import
, print_function
;
19 import re
, os
, sys
, hashlib
, shutil
, platform
, tempfile
, urllib
, gzip
, time
, argparse
, cgi
, subprocess
, socket
;
20 import logging
as log
;
21 from ftplib
import FTP
, FTP_TLS
;
22 from base64
import b64encode
;
29 havemechanize
= False;
34 havemechanize
= False;
49 from httplib2
import HTTPConnectionWithTimeout
, HTTPSConnectionWithTimeout
;
65 if(sys
.version
[0]=="2"):
67 from cStringIO
import StringIO
;
69 from StringIO
import StringIO
;
70 # From http://python-future.org/compatible_idioms.html
71 from urlparse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
;
72 from urllib
import urlencode
;
73 from urllib
import urlopen
as urlopenalt
;
74 from urllib2
import urlopen
, Request
, install_opener
, HTTPError
, URLError
, build_opener
, HTTPCookieProcessor
;
75 import urlparse
, cookielib
;
76 from httplib
import HTTPConnection
, HTTPSConnection
;
77 if(sys
.version
[0]>="3"):
78 from io
import StringIO
, BytesIO
;
79 # From http://python-future.org/compatible_idioms.html
80 from urllib
.parse
import urlparse
, urlunparse
, urlsplit
, urlunsplit
, urljoin
, urlencode
;
81 from urllib
.request
import urlopen
, Request
, install_opener
, build_opener
, HTTPCookieProcessor
;
82 from urllib
.error
import HTTPError
, URLError
;
83 import urllib
.parse
as urlparse
;
84 import http
.cookiejar
as cookielib
;
85 from http
.client
import HTTPConnection
, HTTPSConnection
;
87 __program_name__
= "PyWWW-Get";
88 __program_alt_name__
= "PyWWWGet";
89 __program_small_name__
= "wwwget";
90 __project__
= __program_name__
;
91 __project_url__
= "https://github.com/GameMaker2k/PyWWW-Get";
92 __version_info__
= (0, 9, 4, "RC 1", 1);
93 __version_date_info__
= (2023, 9, 15, "RC 1", 1);
94 __version_date__
= str(__version_date_info__
[0])+"."+str(__version_date_info__
[1]).zfill(2)+"."+str(__version_date_info__
[2]).zfill(2);
95 __revision__
= __version_info__
[3];
96 __revision_id__
= "$Id$";
97 if(__version_info__
[4] is not None):
98 __version_date_plusrc__
= __version_date__
+"-"+str(__version_date_info__
[4]);
99 if(__version_info__
[4] is None):
100 __version_date_plusrc__
= __version_date__
;
101 if(__version_info__
[3] is not None):
102 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2])+" "+str(__version_info__
[3]);
103 if(__version_info__
[3] is None):
104 __version__
= str(__version_info__
[0])+"."+str(__version_info__
[1])+"."+str(__version_info__
[2]);
106 tmpfileprefix
= "py"+str(sys
.version_info
[0])+__program_small_name__
+str(__version_info__
[0])+"-";
108 pytempdir
= tempfile
.gettempdir();
110 compression_supported
= "gzip, deflate";
112 compression_supported
= "gzip, deflate, br";
114 compression_supported
= "gzip, deflate";
116 geturls_cj
= cookielib
.CookieJar();
117 windowsNT4_ua_string
= "Windows NT 4.0";
118 windowsNT4_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-PLATFORM': "4.0.0"};
119 windows2k_ua_string
= "Windows NT 5.0";
120 windows2k_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-PLATFORM': "5.0.0"};
121 windowsXP_ua_string
= "Windows NT 5.1";
122 windowsXP_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-PLATFORM': "5.1.0"};
123 windowsXP64_ua_string
= "Windows NT 5.2; Win64; x64";
124 windowsXP64_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-PLATFORM': "5.1.0"};
125 windows7_ua_string
= "Windows NT 6.1; Win64; x64";
126 windows7_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-PLATFORM': "6.1.0"};
127 windows8_ua_string
= "Windows NT 6.2; Win64; x64";
128 windows8_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-PLATFORM': "6.2.0"};
129 windows81_ua_string
= "Windows NT 6.3; Win64; x64";
130 windows81_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-PLATFORM': "6.3.0"};
131 windows10_ua_string
= "Windows NT 10.0; Win64; x64";
132 windows10_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-PLATFORM': "10.0.0"};
133 windows11_ua_string
= "Windows NT 11.0; Win64; x64";
134 windows11_ua_addon
= {'SEC-CH-UA-PLATFORM': "Windows", 'SEC-CH-UA-ARCH': "x86", 'SEC-CH-UA-PLATFORM': "11.0.0"};
135 geturls_ua_firefox_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:109.0) Gecko/20100101 Firefox/117.0";
136 geturls_ua_seamonkey_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:91.0) Gecko/20100101 Firefox/91.0 SeaMonkey/2.53.17";
137 geturls_ua_chrome_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36";
138 geturls_ua_chromium_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chromium/117.0.0.0 Chrome/117.0.0.0 Safari/537.36";
139 geturls_ua_palemoon_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; rv:102.0) Gecko/20100101 Goanna/6.3 Firefox/102.0 PaleMoon/32.4.0.1";
140 geturls_ua_opera_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 OPR/102.0.0.0";
141 geturls_ua_vivaldi_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36 Vivaldi/6.2.3105.48";
142 geturls_ua_internet_explorer_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+"; Trident/7.0; rv:11.0) like Gecko";
143 geturls_ua_microsoft_edge_windows7
= "Mozilla/5.0 ("+windows7_ua_string
+") AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.31";
144 geturls_ua_pywwwget_python
= "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname
=__project__
, prover
=__version__
, prourl
=__project_url__
);
145 if(platform
.python_implementation()!=""):
146 py_implementation
= platform
.python_implementation();
147 if(platform
.python_implementation()==""):
148 py_implementation
= "Python";
149 geturls_ua_pywwwget_python_alt
= "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver
=platform
.system()+" "+platform
.release(), archtype
=platform
.machine(), prourl
=__project_url__
, pyimp
=py_implementation
, pyver
=platform
.python_version(), proname
=__project__
, prover
=__version__
);
150 geturls_ua_googlebot_google
= "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
151 geturls_ua_googlebot_google_old
= "Googlebot/2.1 (+http://www.google.com/bot.html)";
152 geturls_ua
= geturls_ua_firefox_windows7
;
153 geturls_headers_firefox_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
154 geturls_headers_seamonkey_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
155 geturls_headers_chrome_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
156 geturls_headers_chrome_windows7
.update(windows7_ua_addon
);
157 geturls_headers_chromium_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"117\", \"Not;A=Brand\";v=\"24\"", 'SEC-CH-UA-FULL-VERSION': "117.0.5938.63"};
158 geturls_headers_chromium_windows7
.update(windows7_ua_addon
);
159 geturls_headers_palemoon_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
160 geturls_headers_opera_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Chromium\";v=\"116\", \"Not;A=Brand\";v=\"8\", \"Opera\";v=\"102\"", 'SEC-CH-UA-FULL-VERSION': "102.0.4880.56"};
161 geturls_headers_opera_windows7
.update(windows7_ua_addon
);
162 geturls_headers_vivaldi_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Google Chrome\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Vivaldi\";v=\"6.2\"", 'SEC-CH-UA-FULL-VERSION': "6.2.3105.48"};
163 geturls_headers_vivaldi_windows7
.update(windows7_ua_addon
);
164 geturls_headers_internet_explorer_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
165 geturls_headers_microsoft_edge_windows7
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_microsoft_edge_windows7
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\"Microsoft Edge\";v=\"117\", \"Not;A=Brand\";v=\"8\", \"Chromium\";v=\"117\"", 'SEC-CH-UA-FULL-VERSION': "117.0.2045.31"}
166 geturls_headers_microsoft_edge_windows7
.update(windows7_ua_addon
);
167 geturls_headers_pywwwget_python
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
)};
168 geturls_headers_pywwwget_python_alt
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close", 'SEC-CH-UA': "\""+__project__
+"\";v=\""+str(__version__
)+"\", \"Not;A=Brand\";v=\"8\", \""+py_implementation
+"\";v=\""+str(platform
.release())+"\"", 'SEC-CH-UA-FULL-VERSION': str(__version__
), 'SEC-CH-UA-PLATFORM': ""+py_implementation
+"", 'SEC-CH-UA-ARCH': ""+platform
.machine()+"", 'SEC-CH-UA-PLATFORM': str(__version__
)};
169 geturls_headers_googlebot_google
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
170 geturls_headers_googlebot_google_old
= {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old
, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
171 geturls_headers
= geturls_headers_firefox_windows7
;
172 geturls_download_sleep
= 0;
174 def verbose_printout(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
175 if(outtype
=="print" and dbgenable
):
178 elif(outtype
=="log" and dbgenable
):
179 logging
.info(dbgtxt
);
181 elif(outtype
=="warning" and dbgenable
):
182 logging
.warning(dbgtxt
);
184 elif(outtype
=="error" and dbgenable
):
185 logging
.error(dbgtxt
);
187 elif(outtype
=="critical" and dbgenable
):
188 logging
.critical(dbgtxt
);
190 elif(outtype
=="exception" and dbgenable
):
191 logging
.exception(dbgtxt
);
193 elif(outtype
=="logalt" and dbgenable
):
194 logging
.log(dgblevel
, dbgtxt
);
196 elif(outtype
=="debug" and dbgenable
):
197 logging
.debug(dbgtxt
);
205 def verbose_printout_return(dbgtxt
, outtype
="log", dbgenable
=True, dgblevel
=20):
206 verbose_printout(dbgtxt
, outtype
, dbgenable
, dgblevel
);
209 def add_url_param(url
, **params
):
211 parts
= list(urlparse
.urlsplit(url
));
212 d
= dict(cgi
.parse_qsl(parts
[n
])); # use cgi.parse_qs for list values
214 parts
[n
]=urlencode(d
);
215 return urlparse
.urlunsplit(parts
);
217 os
.environ
["PATH"] = os
.environ
["PATH"] + os
.pathsep
+ os
.path
.dirname(os
.path
.realpath(__file__
)) + os
.pathsep
+ os
.getcwd();
218 def which_exec(execfile):
219 for path
in os
.environ
["PATH"].split(":"):
220 if os
.path
.exists(path
+ "/" + execfile):
221 return path
+ "/" + execfile;
223 def listize(varlist
):
231 newlistreg
.update({ilx
: varlist
[il
]});
232 newlistrev
.update({varlist
[il
]: ilx
});
235 newlistfull
= {1: newlistreg
, 2: newlistrev
, 'reg': newlistreg
, 'rev': newlistrev
};
238 def twolistize(varlist
):
248 newlistnamereg
.update({ilx
: varlist
[il
][0].strip()});
249 newlistnamerev
.update({varlist
[il
][0].strip(): ilx
});
250 newlistdescreg
.update({ilx
: varlist
[il
][1].strip()});
251 newlistdescrev
.update({varlist
[il
][1].strip(): ilx
});
254 newlistnametmp
= {1: newlistnamereg
, 2: newlistnamerev
, 'reg': newlistnamereg
, 'rev': newlistnamerev
};
255 newlistdesctmp
= {1: newlistdescreg
, 2: newlistdescrev
, 'reg': newlistdescreg
, 'rev': newlistdescrev
};
256 newlistfull
= {1: newlistnametmp
, 2: newlistdesctmp
, 'name': newlistnametmp
, 'desc': newlistdesctmp
}
259 def arglistize(proexec
, *varlist
):
263 newarglist
= [proexec
];
265 if varlist
[il
][0] is not None:
266 newarglist
.append(varlist
[il
][0]);
267 if varlist
[il
][1] is not None:
268 newarglist
.append(varlist
[il
][1]);
272 # hms_string by ArcGIS Python Recipes
273 # https://arcpy.wordpress.com/2012/04/20/146/
274 def hms_string(sec_elapsed
):
275 h
= int(sec_elapsed
/ (60 * 60));
276 m
= int((sec_elapsed
% (60 * 60)) / 60);
277 s
= sec_elapsed
% 60.0;
278 return "{}:{:>02}:{:>05.2f}".format(h
, m
, s
);
280 # get_readable_size by Lipis
281 # http://stackoverflow.com/posts/14998888/revisions
282 def get_readable_size(bytes
, precision
=1, unit
="IEC"):
284 if(unit
!="IEC" and unit
!="SI"):
287 units
= [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
288 unitswos
= ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
291 units
= [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
292 unitswos
= ["B","kB","MB","GB","TB","PB","EB","ZB"];
297 if abs(bytes
) < unitsize
:
298 strformat
= "%3."+str(precision
)+"f%s";
299 pre_return_val
= (strformat
% (bytes
, unit
));
300 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
301 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
302 alt_return_val
= pre_return_val
.split();
303 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
306 strformat
= "%."+str(precision
)+"f%s";
307 pre_return_val
= (strformat
% (bytes
, "YiB"));
308 pre_return_val
= re
.sub(r
"([0]+) ([A-Za-z]+)", r
" \2", pre_return_val
);
309 pre_return_val
= re
.sub(r
"\. ([A-Za-z]+)", r
" \1", pre_return_val
);
310 alt_return_val
= pre_return_val
.split();
311 return_val
= {'Bytes': orgbytes
, 'ReadableWithSuffix': pre_return_val
, 'ReadableWithoutSuffix': alt_return_val
[0], 'ReadableSuffix': alt_return_val
[1]}
314 def get_readable_size_from_file(infile
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
316 usehashtypes
= usehashtypes
.lower();
317 getfilesize
= os
.path
.getsize(infile
);
318 return_val
= get_readable_size(getfilesize
, precision
, unit
);
320 hashtypelist
= usehashtypes
.split(",");
321 openfile
= open(infile
, "rb");
322 filecontents
= openfile
.read();
325 listnumend
= len(hashtypelist
);
326 while(listnumcount
< listnumend
):
327 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
328 hashtypelistup
= hashtypelistlow
.upper();
329 filehash
= hashlib
.new(hashtypelistup
);
330 filehash
.update(filecontents
);
331 filegethash
= filehash
.hexdigest();
332 return_val
.update({hashtypelistup
: filegethash
});
336 def get_readable_size_from_string(instring
, precision
=1, unit
="IEC", usehashes
=False, usehashtypes
="md5,sha1"):
338 usehashtypes
= usehashtypes
.lower();
339 getfilesize
= len(instring
);
340 return_val
= get_readable_size(getfilesize
, precision
, unit
);
342 hashtypelist
= usehashtypes
.split(",");
344 listnumend
= len(hashtypelist
);
345 while(listnumcount
< listnumend
):
346 hashtypelistlow
= hashtypelist
[listnumcount
].strip();
347 hashtypelistup
= hashtypelistlow
.upper();
348 filehash
= hashlib
.new(hashtypelistup
);
349 if(sys
.version
[0]=="2"):
350 filehash
.update(instring
);
351 if(sys
.version
[0]>="3"):
352 filehash
.update(instring
.encode('utf-8'));
353 filegethash
= filehash
.hexdigest();
354 return_val
.update({hashtypelistup
: filegethash
});
358 def make_http_headers_from_dict_to_list(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
359 if isinstance(headers
, dict):
361 if(sys
.version
[0]=="2"):
362 for headkey
, headvalue
in headers
.iteritems():
363 returnval
.append((headkey
, headvalue
));
364 if(sys
.version
[0]>="3"):
365 for headkey
, headvalue
in headers
.items():
366 returnval
.append((headkey
, headvalue
));
367 elif isinstance(headers
, list):
373 def make_http_headers_from_dict_to_pycurl(headers
={'Referer': "http://google.com/", 'User-Agent': geturls_ua
, 'Accept-Encoding': compression_supported
, 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
374 if isinstance(headers
, dict):
376 if(sys
.version
[0]=="2"):
377 for headkey
, headvalue
in headers
.iteritems():
378 returnval
.append(headkey
+": "+headvalue
);
379 if(sys
.version
[0]>="3"):
380 for headkey
, headvalue
in headers
.items():
381 returnval
.append(headkey
+": "+headvalue
);
382 elif isinstance(headers
, list):
388 def make_http_headers_from_list_to_dict(headers
=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua
), ("Accept-Encoding", compression_supported
), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
389 if isinstance(headers
, list):
394 returnval
.update({headers
[mli
][0]: headers
[mli
][1]});
396 elif isinstance(headers
, dict):
402 def get_httplib_support(checkvalue
=None):
403 global haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
;
405 returnval
.append("httplib");
407 returnval
.append("httplib2");
408 returnval
.append("urllib");
410 returnval
.append("urllib3");
411 returnval
.append("request3");
412 returnval
.append("request");
414 returnval
.append("requests");
416 returnval
.append("httpx");
418 returnval
.append("mechanize");
419 if(not checkvalue
is None):
420 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
421 checkvalue
= "urllib";
422 if(checkvalue
=="httplib1"):
423 checkvalue
= "httplib";
424 if(checkvalue
in returnval
):
430 def check_httplib_support(checkvalue
="urllib"):
431 if(checkvalue
=="urllib1" or checkvalue
=="urllib2"):
432 checkvalue
= "urllib";
433 if(checkvalue
=="httplib1"):
434 checkvalue
= "httplib";
435 returnval
= get_httplib_support(checkvalue
);
438 def get_httplib_support_list():
439 returnval
= get_httplib_support(None);
442 def download_from_url(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", sleep
=-1):
443 global geturls_download_sleep
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
;
445 sleep
= geturls_download_sleep
;
446 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
447 httplibuse
= "urllib";
448 if(httplibuse
=="httplib1"):
449 httplibuse
= "httplib";
450 if(not haverequests
and httplibuse
=="requests"):
451 httplibuse
= "urllib";
452 if(not havehttpx
and httplibuse
=="httpx"):
453 httplibuse
= "urllib";
454 if(not havemechanize
and httplibuse
=="mechanize"):
455 httplibuse
= "urllib";
456 if(not havehttplib2
and httplibuse
=="httplib2"):
457 httplibuse
= "httplib";
458 if(httplibuse
=="urllib"):
459 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
460 elif(httplibuse
=="request"):
461 returnval
= download_from_url_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
462 elif(httplibuse
=="request3"):
463 returnval
= download_from_url_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
464 elif(httplibuse
=="httplib"):
465 returnval
= download_from_url_with_httplib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
466 elif(httplibuse
=="httplib2"):
467 returnval
= download_from_url_with_httplib2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
468 elif(httplibuse
=="urllib3"):
469 returnval
= download_from_url_with_urllib3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
470 elif(httplibuse
=="requests"):
471 returnval
= download_from_url_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
472 elif(httplibuse
=="httpx"):
473 returnval
= download_from_url_with_httpx(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
474 elif(httplibuse
=="mechanize"):
475 returnval
= download_from_url_with_mechanize(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
);
480 def download_from_url_file(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", buffersize
=524288, sleep
=-1):
481 global geturls_download_sleep
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
;
483 sleep
= geturls_download_sleep
;
484 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
485 httplibuse
= "urllib";
486 if(httplibuse
=="httplib1"):
487 httplibuse
= "httplib";
488 if(not haverequests
and httplibuse
=="requests"):
489 httplibuse
= "urllib";
490 if(not havehttpx
and httplibuse
=="httpx"):
491 httplibuse
= "urllib";
492 if(not havemechanize
and httplibuse
=="mechanize"):
493 httplibuse
= "urllib";
494 if(not havehttplib2
and httplibuse
=="httplib2"):
495 httplibuse
= "httplib";
496 if(httplibuse
=="urllib"):
497 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
498 elif(httplibuse
=="request"):
499 returnval
= download_from_url_file_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
500 elif(httplibuse
=="request3"):
501 returnval
= download_from_url_file_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
502 elif(httplibuse
=="httplib"):
503 returnval
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
504 elif(httplibuse
=="httplib2"):
505 returnval
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
506 elif(httplibuse
=="urllib3"):
507 returnval
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
508 elif(httplibuse
=="requests"):
509 returnval
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
510 elif(httplibuse
=="httpx"):
511 returnval
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
512 elif(httplibuse
=="mechanize"):
513 returnval
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
);
518 def download_from_url_to_file(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, httplibuse
="urllib", outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
519 global geturls_download_sleep
, haverequests
, havemechanize
, havehttplib2
, haveurllib3
, havehttpx
;
521 sleep
= geturls_download_sleep
;
522 if(httplibuse
=="urllib1" or httplibuse
=="urllib2"):
523 httplibuse
= "urllib";
524 if(httplibuse
=="httplib1"):
525 httplibuse
= "httplib";
526 if(not haverequests
and httplibuse
=="requests"):
527 httplibuse
= "urllib";
528 if(not havehttpx
and httplibuse
=="httpx"):
529 httplibuse
= "urllib";
530 if(not havemechanize
and httplibuse
=="mechanize"):
531 httplibuse
= "urllib";
532 if(not havehttplib2
and httplibuse
=="httplib2"):
533 httplibuse
= "httplib";
534 if(httplibuse
=="urllib"):
535 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
536 elif(httplibuse
=="request"):
537 returnval
= download_from_url_to_file_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
538 elif(httplibuse
=="request3"):
539 returnval
= download_from_url_to_file_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
540 elif(httplibuse
=="httplib"):
541 returnval
= download_from_url_to_file_with_httplib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
542 elif(httplibuse
=="httplib2"):
543 returnval
= download_from_url_to_file_with_httplib2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
544 elif(httplibuse
=="urllib3"):
545 returnval
= download_from_url_to_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
546 elif(httplibuse
=="requests"):
547 returnval
= download_from_url_to_file_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
548 elif(httplibuse
=="httpx"):
549 returnval
= download_from_url_to_file_with_httpx(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
550 elif(httplibuse
=="mechanize"):
551 returnval
= download_from_url_to_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, outfile
, outpath
, buffersize
, sleep
);
556 def download_from_url_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
557 global geturls_download_sleep
, havebrotli
;
559 sleep
= geturls_download_sleep
;
560 urlparts
= urlparse
.urlparse(httpurl
);
561 if(isinstance(httpheaders
, list)):
562 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
563 if(urlparts
.username
is not None or urlparts
.password
is not None):
564 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
565 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
566 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
567 if(isinstance(httpheaders
, dict)):
568 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
569 geturls_opener
.addheaders
= httpheaders
;
571 if(postdata
is not None and not isinstance(postdata
, dict)):
572 postdata
= urlencode(postdata
);
574 if(httpmethod
=="GET"):
575 geturls_text
= geturls_opener
.open(httpurl
);
576 elif(httpmethod
=="POST"):
577 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
579 geturls_text
= geturls_opener
.open(httpurl
);
580 except HTTPError
as geturls_text_error
:
581 geturls_text
= geturls_text_error
;
582 log
.info("Error With URL "+httpurl
);
584 log
.info("Error With URL "+httpurl
);
586 except socket
.timeout
:
587 log
.info("Error With URL "+httpurl
);
589 log
.info("Downloading URL "+httpurl
);
590 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
591 if(sys
.version
[0]=="2"):
592 strbuf
= StringIO(geturls_text
.read());
593 if(sys
.version
[0]>="3"):
594 strbuf
= BytesIO(geturls_text
.read());
595 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
596 returnval_content
= gzstrbuf
.read()[:];
597 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
598 returnval_content
= geturls_text
.read()[:];
599 if(geturls_text
.info().get("Content-Encoding")=="br" and havebrotli
):
600 returnval_content
= brotli
.decompress(returnval_content
);
601 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.info()), 'HeadersSent': make_http_headers_from_list_to_dict(httpheaders
), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.getcode()};
602 geturls_text
.close();
605 def download_from_url_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
606 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
607 exec_time_start
= time
.time();
608 myhash
= hashlib
.new("sha1");
609 if(sys
.version
[0]=="2"):
610 myhash
.update(httpurl
);
611 myhash
.update(str(buffersize
));
612 myhash
.update(str(exec_time_start
));
613 if(sys
.version
[0]>="3"):
614 myhash
.update(httpurl
.encode('utf-8'));
615 myhash
.update(str(buffersize
).encode('utf-8'));
616 myhash
.update(str(exec_time_start
).encode('utf-8'));
617 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
619 sleep
= geturls_download_sleep
;
620 urlparts
= urlparse
.urlparse(httpurl
);
621 if(isinstance(httpheaders
, list)):
622 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
623 if(urlparts
.username
is not None or urlparts
.password
is not None):
624 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
625 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
626 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
627 if(isinstance(httpheaders
, dict)):
628 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
629 geturls_opener
.addheaders
= httpheaders
;
632 if(httpmethod
=="GET"):
633 geturls_text
= geturls_opener
.open(httpurl
);
634 elif(httpmethod
=="POST"):
635 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
637 geturls_text
= geturls_opener
.open(httpurl
);
638 except HTTPError
as geturls_text_error
:
639 geturls_text
= geturls_text_error
;
640 log
.info("Error With URL "+httpurl
);
642 log
.info("Error With URL "+httpurl
);
644 except socket
.timeout
:
645 log
.info("Error With URL "+httpurl
);
647 except socket
.timeout
:
648 log
.info("Error With URL "+httpurl
);
650 downloadsize
= geturls_text
.info().get('Content-Length');
651 if(downloadsize
is not None):
652 downloadsize
= int(downloadsize
);
653 if downloadsize
is None: downloadsize
= 0;
656 log
.info("Downloading URL "+httpurl
);
657 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
658 tmpfilename
= f
.name
;
659 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.info()), 'HeadersSent': make_http_headers_from_list_to_dict(httpheaders
), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.getcode()};
661 databytes
= geturls_text
.read(buffersize
);
662 if not databytes
: break;
663 datasize
= len(databytes
);
664 fulldatasize
= datasize
+ fulldatasize
;
667 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
668 downloaddiff
= fulldatasize
- prevdownsize
;
669 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
670 prevdownsize
= fulldatasize
;
673 geturls_text
.close();
674 exec_time_end
= time
.time();
675 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
676 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
679 def download_from_url_to_file_with_urllib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
680 global geturls_download_sleep
;
682 sleep
= geturls_download_sleep
;
683 if(not outfile
=="-"):
684 outpath
= outpath
.rstrip(os
.path
.sep
);
685 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
686 if(not os
.path
.exists(outpath
)):
687 os
.makedirs(outpath
);
688 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
690 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
692 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
693 if(not pretmpfilename
):
695 tmpfilename
= pretmpfilename
['Filename'];
696 downloadsize
= os
.path
.getsize(tmpfilename
);
698 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
699 exec_time_start
= time
.time();
700 shutil
.move(tmpfilename
, filepath
);
701 exec_time_end
= time
.time();
702 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
703 if(os
.path
.exists(tmpfilename
)):
704 os
.remove(tmpfilename
);
705 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
706 if(outfile
=="-" and sys
.version
[0]=="2"):
707 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
708 if(not pretmpfilename
):
710 tmpfilename
= pretmpfilename
['Filename'];
711 downloadsize
= os
.path
.getsize(tmpfilename
);
714 exec_time_start
= time
.time();
715 with
open(tmpfilename
, 'rb') as ft
:
718 databytes
= ft
.read(buffersize
[1]);
719 if not databytes
: break;
720 datasize
= len(databytes
);
721 fulldatasize
= datasize
+ fulldatasize
;
724 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
725 downloaddiff
= fulldatasize
- prevdownsize
;
726 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
727 prevdownsize
= fulldatasize
;
730 fdata
= f
.getvalue();
733 os
.remove(tmpfilename
);
734 exec_time_end
= time
.time();
735 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
736 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
737 if(outfile
=="-" and sys
.version
[0]>="3"):
738 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
739 tmpfilename
= pretmpfilename
['Filename'];
740 downloadsize
= os
.path
.getsize(tmpfilename
);
743 exec_time_start
= time
.time();
744 with
open(tmpfilename
, 'rb') as ft
:
747 databytes
= ft
.read(buffersize
[1]);
748 if not databytes
: break;
749 datasize
= len(databytes
);
750 fulldatasize
= datasize
+ fulldatasize
;
753 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
754 downloaddiff
= fulldatasize
- prevdownsize
;
755 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
756 prevdownsize
= fulldatasize
;
759 fdata
= f
.getvalue();
762 os
.remove(tmpfilename
);
763 exec_time_end
= time
.time();
764 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
765 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
768 def download_from_url_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
769 global geturls_download_sleep
, havebrotli
;
771 sleep
= geturls_download_sleep
;
772 urlparts
= urlparse
.urlparse(httpurl
);
773 if(isinstance(httpheaders
, list)):
774 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
775 if(urlparts
.username
is not None or urlparts
.password
is not None):
776 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
777 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
778 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
779 geturls_opener
.addheaders
= httpheaders
;
781 if(urlparts
[0]=="http"):
782 httpconn
= HTTPConnection(urlparts
[1]);
783 elif(urlparts
[0]=="https"):
784 httpconn
= HTTPSConnection(urlparts
[1]);
787 if(postdata
is not None and not isinstance(postdata
, dict)):
788 postdata
= urlencode(postdata
);
790 if(httpmethod
=="GET"):
791 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
792 elif(httpmethod
=="POST"):
793 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
795 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
796 except socket
.timeout
:
797 log
.info("Error With URL "+httpurl
);
799 geturls_text
= httpconn
.getresponse();
800 log
.info("Downloading URL "+httpurl
);
801 if(dict(geturls_text
.getheaders()).get("Content-Encoding")=="gzip" or dict(geturls_text
.getheaders()).get("Content-Encoding")=="deflate"):
802 if(sys
.version
[0]=="2"):
803 strbuf
= StringIO(geturls_text
.read());
804 if(sys
.version
[0]>="3"):
805 strbuf
= BytesIO(geturls_text
.read());
806 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
807 returnval_content
= gzstrbuf
.read()[:];
808 if(dict(geturls_text
.getheaders()).get("Content-Encoding")!="gzip" and dict(geturls_text
.getheaders()).get("Content-Encoding")!="deflate"):
809 returnval_content
= geturls_text
.read()[:];
810 if(dict(geturls_text
.getheaders()).get("Content-Encoding")=="br" and havebrotli
):
811 returnval_content
= brotli
.decompress(returnval_content
);
812 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.getheaders()), 'HeadersSent': httpheaders
, 'URL': httpurl
, 'Code': geturls_text
.status
};
813 geturls_text
.close();
816 def download_from_url_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
817 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
818 exec_time_start
= time
.time();
819 myhash
= hashlib
.new("sha1");
820 if(sys
.version
[0]=="2"):
821 myhash
.update(httpurl
);
822 myhash
.update(str(buffersize
));
823 myhash
.update(str(exec_time_start
));
824 if(sys
.version
[0]>="3"):
825 myhash
.update(httpurl
.encode('utf-8'));
826 myhash
.update(str(buffersize
).encode('utf-8'));
827 myhash
.update(str(exec_time_start
).encode('utf-8'));
828 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
830 sleep
= geturls_download_sleep
;
831 urlparts
= urlparse
.urlparse(httpurl
);
832 if(isinstance(httpheaders
, list)):
833 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
834 if(urlparts
.username
is not None or urlparts
.password
is not None):
835 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
836 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
837 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
838 geturls_opener
.addheaders
= httpheaders
;
840 if(urlparts
[0]=="http"):
841 httpconn
= HTTPConnection(urlparts
[1]);
842 elif(urlparts
[0]=="https"):
843 httpconn
= HTTPSConnection(urlparts
[1]);
846 if(postdata
is not None and not isinstance(postdata
, dict)):
847 postdata
= urlencode(postdata
);
849 if(httpmethod
=="GET"):
850 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
851 elif(httpmethod
=="POST"):
852 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
854 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
855 except socket
.timeout
:
856 log
.info("Error With URL "+httpurl
);
858 geturls_text
= httpconn
.getresponse();
859 downloadsize
= dict(geturls_text
.getheaders()).get('Content-Length');
860 if(downloadsize
is not None):
861 downloadsize
= int(downloadsize
);
862 if downloadsize
is None: downloadsize
= 0;
865 log
.info("Downloading URL "+httpurl
);
866 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
867 tmpfilename
= f
.name
;
868 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(dict(geturls_text
.getheaders())), 'HeadersSent': httpheaders
, 'URL': httpurl
, 'Code': geturls_text
.status
};
870 databytes
= geturls_text
.read(buffersize
);
871 if not databytes
: break;
872 datasize
= len(databytes
);
873 fulldatasize
= datasize
+ fulldatasize
;
876 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
877 downloaddiff
= fulldatasize
- prevdownsize
;
878 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
879 prevdownsize
= fulldatasize
;
882 geturls_text
.close();
883 exec_time_end
= time
.time();
884 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
885 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
888 def download_from_url_to_file_with_httplib(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
889 global geturls_download_sleep
;
891 sleep
= geturls_download_sleep
;
892 if(not outfile
=="-"):
893 outpath
= outpath
.rstrip(os
.path
.sep
);
894 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
895 if(not os
.path
.exists(outpath
)):
896 os
.makedirs(outpath
);
897 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
899 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
901 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
902 if(not pretmpfilename
):
904 tmpfilename
= pretmpfilename
['Filename'];
905 downloadsize
= os
.path
.getsize(tmpfilename
);
907 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
908 exec_time_start
= time
.time();
909 shutil
.move(tmpfilename
, filepath
);
910 exec_time_end
= time
.time();
911 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
912 if(os
.path
.exists(tmpfilename
)):
913 os
.remove(tmpfilename
);
914 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
915 if(outfile
=="-" and sys
.version
[0]=="2"):
916 pretmpfilename
= download_from_url_file_with_httplib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
917 if(not pretmpfilename
):
919 tmpfilename
= pretmpfilename
['Filename'];
920 downloadsize
= os
.path
.getsize(tmpfilename
);
923 exec_time_start
= time
.time();
924 with
open(tmpfilename
, 'rb') as ft
:
927 databytes
= ft
.read(buffersize
[1]);
928 if not databytes
: break;
929 datasize
= len(databytes
);
930 fulldatasize
= datasize
+ fulldatasize
;
933 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
934 downloaddiff
= fulldatasize
- prevdownsize
;
935 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
936 prevdownsize
= fulldatasize
;
939 fdata
= f
.getvalue();
942 os
.remove(tmpfilename
);
943 exec_time_end
= time
.time();
944 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
945 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
946 if(outfile
=="-" and sys
.version
[0]>="3"):
947 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
948 tmpfilename
= pretmpfilename
['Filename'];
949 downloadsize
= os
.path
.getsize(tmpfilename
);
952 exec_time_start
= time
.time();
953 with
open(tmpfilename
, 'rb') as ft
:
956 databytes
= ft
.read(buffersize
[1]);
957 if not databytes
: break;
958 datasize
= len(databytes
);
959 fulldatasize
= datasize
+ fulldatasize
;
962 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
963 downloaddiff
= fulldatasize
- prevdownsize
;
964 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
965 prevdownsize
= fulldatasize
;
968 fdata
= f
.getvalue();
971 os
.remove(tmpfilename
);
972 exec_time_end
= time
.time();
973 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
974 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
978 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
979 global geturls_download_sleep
, havebrotli
;
981 sleep
= geturls_download_sleep
;
982 urlparts
= urlparse
.urlparse(httpurl
);
983 if(isinstance(httpheaders
, list)):
984 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
985 if(urlparts
.username
is not None or urlparts
.password
is not None):
986 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
987 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
988 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
989 geturls_opener
.addheaders
= httpheaders
;
991 if(urlparts
[0]=="http"):
992 httpconn
= HTTPConnectionWithTimeout(urlparts
[1]);
993 elif(urlparts
[0]=="https"):
994 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1]);
997 if(postdata
is not None and not isinstance(postdata
, dict)):
998 postdata
= urlencode(postdata
);
1000 if(httpmethod
=="GET"):
1001 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1002 elif(httpmethod
=="POST"):
1003 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1005 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1006 except socket
.timeout
:
1007 log
.info("Error With URL "+httpurl
);
1009 geturls_text
= httpconn
.getresponse();
1010 log
.info("Downloading URL "+httpurl
);
1011 if(dict(geturls_text
.getheaders()).get("Content-Encoding")=="gzip" or dict(geturls_text
.getheaders()).get("Content-Encoding")=="deflate"):
1012 if(sys
.version
[0]=="2"):
1013 strbuf
= StringIO(geturls_text
.read());
1014 if(sys
.version
[0]>="3"):
1015 strbuf
= BytesIO(geturls_text
.read());
1016 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1017 returnval_content
= gzstrbuf
.read()[:];
1018 if(dict(geturls_text
.getheaders()).get("Content-Encoding")!="gzip" and dict(geturls_text
.getheaders()).get("Content-Encoding")!="deflate"):
1019 returnval_content
= geturls_text
.read()[:];
1020 if(dict(geturls_text
.getheaders()).get("Content-Encoding")=="br" and havebrotli
):
1021 returnval_content
= brotli
.decompress(returnval_content
);
1022 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.getheaders()), 'HeadersSent': httpheaders
, 'URL': httpurl
, 'Code': geturls_text
.status
};
1023 geturls_text
.close();
1026 if(not havehttplib2
):
1027 def download_from_url_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1028 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
1032 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1033 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1034 exec_time_start
= time
.time();
1035 myhash
= hashlib
.new("sha1");
1036 if(sys
.version
[0]=="2"):
1037 myhash
.update(httpurl
);
1038 myhash
.update(str(buffersize
));
1039 myhash
.update(str(exec_time_start
));
1040 if(sys
.version
[0]>="3"):
1041 myhash
.update(httpurl
.encode('utf-8'));
1042 myhash
.update(str(buffersize
).encode('utf-8'));
1043 myhash
.update(str(exec_time_start
).encode('utf-8'));
1044 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1046 sleep
= geturls_download_sleep
;
1047 urlparts
= urlparse
.urlparse(httpurl
);
1048 if(isinstance(httpheaders
, list)):
1049 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1050 if(urlparts
.username
is not None or urlparts
.password
is not None):
1051 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1052 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1053 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1054 geturls_opener
.addheaders
= httpheaders
;
1056 if(urlparts
[0]=="http"):
1057 httpconn
= HTTPConnectionWithTimeout(urlparts
[1]);
1058 elif(urlparts
[0]=="https"):
1059 httpconn
= HTTPSConnectionWithTimeout(urlparts
[1]);
1062 if(postdata
is not None and not isinstance(postdata
, dict)):
1063 postdata
= urlencode(postdata
);
1065 if(httpmethod
=="GET"):
1066 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1067 elif(httpmethod
=="POST"):
1068 httpconn
.request("GET", urlparts
[2], body
=postdata
, headers
=httpheaders
);
1070 httpconn
.request("GET", urlparts
[2], headers
=httpheaders
);
1071 except socket
.timeout
:
1072 log
.info("Error With URL "+httpurl
);
1074 geturls_text
= httpconn
.getresponse();
1075 downloadsize
= dict(geturls_text
.getheaders()).get('Content-Length');
1076 if(downloadsize
is not None):
1077 downloadsize
= int(downloadsize
);
1078 if downloadsize
is None: downloadsize
= 0;
1081 log
.info("Downloading URL "+httpurl
);
1082 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1083 tmpfilename
= f
.name
;
1084 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(dict(geturls_text
.getheaders())), 'HeadersSent': httpheaders
, 'URL': httpurl
, 'Code': geturls_text
.status
};
1086 databytes
= geturls_text
.read(buffersize
);
1087 if not databytes
: break;
1088 datasize
= len(databytes
);
1089 fulldatasize
= datasize
+ fulldatasize
;
1092 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1093 downloaddiff
= fulldatasize
- prevdownsize
;
1094 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1095 prevdownsize
= fulldatasize
;
1098 geturls_text
.close();
1099 exec_time_end
= time
.time();
1100 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1101 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1104 if(not havehttplib2
):
1105 def download_from_url_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1106 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
1110 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1111 global geturls_download_sleep
;
1113 sleep
= geturls_download_sleep
;
1114 if(not outfile
=="-"):
1115 outpath
= outpath
.rstrip(os
.path
.sep
);
1116 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1117 if(not os
.path
.exists(outpath
)):
1118 os
.makedirs(outpath
);
1119 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1121 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1123 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1124 if(not pretmpfilename
):
1126 tmpfilename
= pretmpfilename
['Filename'];
1127 downloadsize
= os
.path
.getsize(tmpfilename
);
1129 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1130 exec_time_start
= time
.time();
1131 shutil
.move(tmpfilename
, filepath
);
1132 exec_time_end
= time
.time();
1133 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1134 if(os
.path
.exists(tmpfilename
)):
1135 os
.remove(tmpfilename
);
1136 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1137 if(outfile
=="-" and sys
.version
[0]=="2"):
1138 pretmpfilename
= download_from_url_file_with_httplib2(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1139 if(not pretmpfilename
):
1141 tmpfilename
= pretmpfilename
['Filename'];
1142 downloadsize
= os
.path
.getsize(tmpfilename
);
1145 exec_time_start
= time
.time();
1146 with
open(tmpfilename
, 'rb') as ft
:
1149 databytes
= ft
.read(buffersize
[1]);
1150 if not databytes
: break;
1151 datasize
= len(databytes
);
1152 fulldatasize
= datasize
+ fulldatasize
;
1155 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1156 downloaddiff
= fulldatasize
- prevdownsize
;
1157 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1158 prevdownsize
= fulldatasize
;
1161 fdata
= f
.getvalue();
1164 os
.remove(tmpfilename
);
1165 exec_time_end
= time
.time();
1166 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1167 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1168 if(outfile
=="-" and sys
.version
[0]>="3"):
1169 pretmpfilename
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1170 tmpfilename
= pretmpfilename
['Filename'];
1171 downloadsize
= os
.path
.getsize(tmpfilename
);
1174 exec_time_start
= time
.time();
1175 with
open(tmpfilename
, 'rb') as ft
:
1178 databytes
= ft
.read(buffersize
[1]);
1179 if not databytes
: break;
1180 datasize
= len(databytes
);
1181 fulldatasize
= datasize
+ fulldatasize
;
1184 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1185 downloaddiff
= fulldatasize
- prevdownsize
;
1186 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1187 prevdownsize
= fulldatasize
;
1190 fdata
= f
.getvalue();
1193 os
.remove(tmpfilename
);
1194 exec_time_end
= time
.time();
1195 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1196 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1199 if(not havehttplib2
):
1200 def download_from_url_to_file_with_httplib2(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1201 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
1204 def download_from_url_with_request(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1205 global geturls_download_sleep
, havebrotli
;
1207 sleep
= geturls_download_sleep
;
1208 urlparts
= urlparse
.urlparse(httpurl
);
1209 if(isinstance(httpheaders
, list)):
1210 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1211 if(urlparts
.username
is not None or urlparts
.password
is not None):
1212 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1213 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1214 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1215 if(isinstance(httpheaders
, dict)):
1216 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1217 geturls_opener
.addheaders
= httpheaders
;
1218 urllib
.request
.install_opener(geturls_opener
);
1220 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1221 if(postdata
is not None and not isinstance(postdata
, dict)):
1222 postdata
= urlencode(postdata
);
1224 if(httpmethod
=="GET"):
1225 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1226 geturls_text
= urlopen(geturls_request
);
1227 elif(httpmethod
=="POST"):
1228 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1229 geturls_text
= urlopen(geturls_request
, data
=postdata
);
1231 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1232 geturls_text
= urlopen(geturls_request
);
1233 except HTTPError
as geturls_text_error
:
1234 geturls_text
= geturls_text_error
;
1235 log
.info("Error With URL "+httpurl
);
1237 log
.info("Error With URL "+httpurl
);
1239 except socket
.timeout
:
1240 log
.info("Error With URL "+httpurl
);
1242 log
.info("Downloading URL "+httpurl
);
1243 if(geturls_text
.headers
.get("Content-Encoding")=="gzip" or geturls_text
.headers
.get("Content-Encoding")=="deflate"):
1244 if(sys
.version
[0]=="2"):
1245 strbuf
= StringIO(geturls_text
.read());
1246 if(sys
.version
[0]>="3"):
1247 strbuf
= BytesIO(geturls_text
.read());
1248 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1249 returnval_content
= gzstrbuf
.read()[:];
1250 if(geturls_text
.headers
.get("Content-Encoding")!="gzip" and geturls_text
.headers
.get("Content-Encoding")!="deflate"):
1251 returnval_content
= geturls_text
.read()[:];
1252 if(geturls_text
.headers
.get("Content-Encoding")=="br" and havebrotli
):
1253 returnval_content
= brotli
.decompress(returnval_content
);
1254 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.headers
), 'HeadersSent': make_http_headers_from_list_to_dict(httpheaders
), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.getcode()};
1255 geturls_text
.close();
1258 def download_from_url_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1259 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1260 exec_time_start
= time
.time();
1261 myhash
= hashlib
.new("sha1");
1262 if(sys
.version
[0]=="2"):
1263 myhash
.update(httpurl
);
1264 myhash
.update(str(buffersize
));
1265 myhash
.update(str(exec_time_start
));
1266 if(sys
.version
[0]>="3"):
1267 myhash
.update(httpurl
.encode('utf-8'));
1268 myhash
.update(str(buffersize
).encode('utf-8'));
1269 myhash
.update(str(exec_time_start
).encode('utf-8'));
1270 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1272 sleep
= geturls_download_sleep
;
1273 urlparts
= urlparse
.urlparse(httpurl
);
1274 if(isinstance(httpheaders
, list)):
1275 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1276 if(urlparts
.username
is not None or urlparts
.password
is not None):
1277 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1278 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1279 geturls_opener
= build_opener(HTTPCookieProcessor(httpcookie
));
1280 if(isinstance(httpheaders
, dict)):
1281 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
1282 geturls_opener
.addheaders
= httpheaders
;
1283 urllib
.request
.install_opener(geturls_opener
);
1285 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1286 if(postdata
is not None and not isinstance(postdata
, dict)):
1287 postdata
= urlencode(postdata
);
1289 if(httpmethod
=="GET"):
1290 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1291 geturls_text
= urlopen(geturls_request
);
1292 elif(httpmethod
=="POST"):
1293 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1294 geturls_text
= urlopen(geturls_request
, data
=postdata
);
1296 geturls_request
= Request(httpurl
, headers
=httpheaders
);
1297 geturls_text
= urlopen(geturls_request
);
1298 except HTTPError
as geturls_text_error
:
1299 geturls_text
= geturls_text_error
;
1300 log
.info("Error With URL "+httpurl
);
1302 log
.info("Error With URL "+httpurl
);
1304 except socket
.timeout
:
1305 log
.info("Error With URL "+httpurl
);
1307 downloadsize
= geturls_text
.headers
.get('Content-Length');
1308 if(downloadsize
is not None):
1309 downloadsize
= int(downloadsize
);
1310 if downloadsize
is None: downloadsize
= 0;
1313 log
.info("Downloading URL "+httpurl
);
1314 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1315 tmpfilename
= f
.name
;
1316 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.headers
), 'HeadersSent': make_http_headers_from_list_to_dict(httpheaders
), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.getcode()};
1318 databytes
= geturls_text
.read(buffersize
);
1319 if not databytes
: break;
1320 datasize
= len(databytes
);
1321 fulldatasize
= datasize
+ fulldatasize
;
1324 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1325 downloaddiff
= fulldatasize
- prevdownsize
;
1326 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1327 prevdownsize
= fulldatasize
;
1330 geturls_text
.close();
1331 exec_time_end
= time
.time();
1332 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1333 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1336 def download_from_url_to_file_with_request(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1337 global geturls_download_sleep
;
1339 sleep
= geturls_download_sleep
;
1340 if(not outfile
=="-"):
1341 outpath
= outpath
.rstrip(os
.path
.sep
);
1342 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1343 if(not os
.path
.exists(outpath
)):
1344 os
.makedirs(outpath
);
1345 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1347 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1349 pretmpfilename
= download_from_url_file_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1350 if(not pretmpfilename
):
1352 tmpfilename
= pretmpfilename
['Filename'];
1353 downloadsize
= os
.path
.getsize(tmpfilename
);
1355 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1356 exec_time_start
= time
.time();
1357 shutil
.move(tmpfilename
, filepath
);
1358 exec_time_end
= time
.time();
1359 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1360 if(os
.path
.exists(tmpfilename
)):
1361 os
.remove(tmpfilename
);
1362 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent':pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1363 if(outfile
=="-" and sys
.version
[0]=="2"):
1364 pretmpfilename
= download_from_url_file_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1365 if(not pretmpfilename
):
1367 tmpfilename
= pretmpfilename
['Filename'];
1368 downloadsize
= os
.path
.getsize(tmpfilename
);
1371 exec_time_start
= time
.time();
1372 with
open(tmpfilename
, 'rb') as ft
:
1375 databytes
= ft
.read(buffersize
[1]);
1376 if not databytes
: break;
1377 datasize
= len(databytes
);
1378 fulldatasize
= datasize
+ fulldatasize
;
1381 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1382 downloaddiff
= fulldatasize
- prevdownsize
;
1383 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1384 prevdownsize
= fulldatasize
;
1387 fdata
= f
.getvalue();
1390 os
.remove(tmpfilename
);
1391 exec_time_end
= time
.time();
1392 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1393 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1394 if(outfile
=="-" and sys
.version
[0]>="3"):
1395 pretmpfilename
= download_from_url_file_with_request(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1396 tmpfilename
= pretmpfilename
['Filename'];
1397 downloadsize
= os
.path
.getsize(tmpfilename
);
1400 exec_time_start
= time
.time();
1401 with
open(tmpfilename
, 'rb') as ft
:
1404 databytes
= ft
.read(buffersize
[1]);
1405 if not databytes
: break;
1406 datasize
= len(databytes
);
1407 fulldatasize
= datasize
+ fulldatasize
;
1410 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1411 downloaddiff
= fulldatasize
- prevdownsize
;
1412 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1413 prevdownsize
= fulldatasize
;
1416 fdata
= f
.getvalue();
1419 os
.remove(tmpfilename
);
1420 exec_time_end
= time
.time();
1421 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1422 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1426 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1427 global geturls_download_sleep
, havebrotli
;
1429 sleep
= geturls_download_sleep
;
1430 urlparts
= urlparse
.urlparse(httpurl
);
1431 if(isinstance(httpheaders
, list)):
1432 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1433 if(urlparts
.username
is not None or urlparts
.password
is not None):
1434 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1435 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1437 if(postdata
is not None and not isinstance(postdata
, dict)):
1438 postdata
= urlencode(postdata
);
1440 if(httpmethod
=="GET"):
1441 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1442 elif(httpmethod
=="POST"):
1443 geturls_text
= requests
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1445 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1446 except requests
.exceptions
.ConnectTimeout
:
1447 log
.info("Error With URL "+httpurl
);
1449 except socket
.timeout
:
1450 log
.info("Error With URL "+httpurl
);
1452 log
.info("Downloading URL "+httpurl
);
1453 if(geturls_text
.headers
.get('Content-Type')=="gzip" or geturls_text
.headers
.get('Content-Type')=="deflate"):
1454 if(sys
.version
[0]=="2"):
1455 strbuf
= StringIO(geturls_text
.content
);
1456 if(sys
.version
[0]>="3"):
1457 strbuf
= BytesIO(geturls_text
.content
);
1458 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1459 returnval_content
= gzstrbuf
.content
[:];
1460 if(geturls_text
.headers
.get('Content-Type')!="gzip" and geturls_text
.headers
.get('Content-Type')!="deflate"):
1461 returnval_content
= geturls_text
.content
[:];
1462 if(geturls_text
.headers
.get("Content-Encoding")=="br" and havebrotli
):
1463 returnval_content
= brotli
.decompress(returnval_content
);
1464 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.headers
), 'HeadersSent': make_http_headers_from_list_to_dict(httpheaders
), 'URL': geturls_text
.url
, 'Code': geturls_text
.status_code
};
1465 geturls_text
.close();
1468 if(not haverequests
):
1469 def download_from_url_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1470 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
1474 def download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1475 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1476 exec_time_start
= time
.time();
1477 myhash
= hashlib
.new("sha1");
1478 if(sys
.version
[0]=="2"):
1479 myhash
.update(httpurl
);
1480 myhash
.update(str(buffersize
));
1481 myhash
.update(str(exec_time_start
));
1482 if(sys
.version
[0]>="3"):
1483 myhash
.update(httpurl
.encode('utf-8'));
1484 myhash
.update(str(buffersize
).encode('utf-8'));
1485 myhash
.update(str(exec_time_start
).encode('utf-8'));
1486 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1488 sleep
= geturls_download_sleep
;
1489 urlparts
= urlparse
.urlparse(httpurl
);
1490 if(isinstance(httpheaders
, list)):
1491 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1492 if(urlparts
.username
is not None or urlparts
.password
is not None):
1493 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1494 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1496 if(postdata
is not None and not isinstance(postdata
, dict)):
1497 postdata
= urlencode(postdata
);
1499 if(httpmethod
=="GET"):
1500 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1501 elif(httpmethod
=="POST"):
1502 geturls_text
= requests
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1504 geturls_text
= requests
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1505 except requests
.exceptions
.ConnectTimeout
:
1506 log
.info("Error With URL "+httpurl
);
1508 except socket
.timeout
:
1509 log
.info("Error With URL "+httpurl
);
1511 downloadsize
= int(geturls_text
.headers
.get('Content-Length'));
1512 if(downloadsize
is not None):
1513 downloadsize
= int(downloadsize
);
1514 if downloadsize
is None: downloadsize
= 0;
1517 log
.info("Downloading URL "+httpurl
);
1518 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1519 tmpfilename
= f
.name
;
1520 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.headers
), 'HeadersSent': make_http_headers_from_list_to_dict(httpheaders
), 'URL': geturls_text
.url
, 'Code': geturls_text
.status_code
};
1521 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
1522 datasize
= len(databytes
);
1523 fulldatasize
= datasize
+ fulldatasize
;
1526 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1527 downloaddiff
= fulldatasize
- prevdownsize
;
1528 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1529 prevdownsize
= fulldatasize
;
1532 geturls_text
.close();
1533 exec_time_end
= time
.time();
1534 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1535 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1538 if(not haverequests
):
1539 def download_from_url_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1540 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
1544 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1545 global geturls_download_sleep
;
1547 sleep
= geturls_download_sleep
;
1548 if(not outfile
=="-"):
1549 outpath
= outpath
.rstrip(os
.path
.sep
);
1550 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1551 if(not os
.path
.exists(outpath
)):
1552 os
.makedirs(outpath
);
1553 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1555 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1557 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1558 if(not pretmpfilename
):
1560 tmpfilename
= pretmpfilename
['Filename'];
1561 downloadsize
= os
.path
.getsize(tmpfilename
);
1563 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1564 exec_time_start
= time
.time();
1565 shutil
.move(tmpfilename
, filepath
);
1566 exec_time_end
= time
.time();
1567 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1568 if(os
.path
.exists(tmpfilename
)):
1569 os
.remove(tmpfilename
);
1570 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1571 if(outfile
=="-" and sys
.version
[0]=="2"):
1572 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1573 if(not pretmpfilename
):
1575 tmpfilename
= pretmpfilename
['Filename'];
1576 downloadsize
= os
.path
.getsize(tmpfilename
);
1579 exec_time_start
= time
.time();
1580 with
open(tmpfilename
, 'rb') as ft
:
1583 databytes
= ft
.read(buffersize
[1]);
1584 if not databytes
: break;
1585 datasize
= len(databytes
);
1586 fulldatasize
= datasize
+ fulldatasize
;
1589 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1590 downloaddiff
= fulldatasize
- prevdownsize
;
1591 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1592 prevdownsize
= fulldatasize
;
1595 fdata
= f
.getvalue();
1598 os
.remove(tmpfilename
);
1599 exec_time_end
= time
.time();
1600 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1601 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1602 if(outfile
=="-" and sys
.version
[0]>="3"):
1603 pretmpfilename
= download_from_url_file_with_requests(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1604 tmpfilename
= pretmpfilename
['Filename'];
1605 downloadsize
= os
.path
.getsize(tmpfilename
);
1608 exec_time_start
= time
.time();
1609 with
open(tmpfilename
, 'rb') as ft
:
1612 databytes
= ft
.read(buffersize
[1]);
1613 if not databytes
: break;
1614 datasize
= len(databytes
);
1615 fulldatasize
= datasize
+ fulldatasize
;
1618 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1619 downloaddiff
= fulldatasize
- prevdownsize
;
1620 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1621 prevdownsize
= fulldatasize
;
1624 fdata
= f
.getvalue();
1627 os
.remove(tmpfilename
);
1628 exec_time_end
= time
.time();
1629 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1630 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1633 if(not haverequests
):
1634 def download_from_url_to_file_with_requests(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1635 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
1639 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1640 global geturls_download_sleep
, havebrotli
;
1642 sleep
= geturls_download_sleep
;
1643 urlparts
= urlparse
.urlparse(httpurl
);
1644 if(isinstance(httpheaders
, list)):
1645 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1646 if(urlparts
.username
is not None or urlparts
.password
is not None):
1647 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1648 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1650 if(postdata
is not None and not isinstance(postdata
, dict)):
1651 postdata
= urlencode(postdata
);
1653 if(httpmethod
=="GET"):
1654 geturls_text
= httpx
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1655 elif(httpmethod
=="POST"):
1656 geturls_text
= httpx
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1658 geturls_text
= httpx
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1659 except requests
.exceptions
.ConnectTimeout
:
1660 log
.info("Error With URL "+httpurl
);
1662 except socket
.timeout
:
1663 log
.info("Error With URL "+httpurl
);
1665 log
.info("Downloading URL "+httpurl
);
1666 if(geturls_text
.headers
.get('Content-Type')=="gzip" or geturls_text
.headers
.get('Content-Type')=="deflate"):
1667 if(sys
.version
[0]=="2"):
1668 strbuf
= StringIO(geturls_text
.content
);
1669 if(sys
.version
[0]>="3"):
1670 strbuf
= BytesIO(geturls_text
.content
);
1671 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1672 returnval_content
= gzstrbuf
.content
[:];
1673 if(geturls_text
.headers
.get('Content-Type')!="gzip" and geturls_text
.headers
.get('Content-Type')!="deflate"):
1674 returnval_content
= geturls_text
.content
[:];
1675 if(geturls_text
.headers
.get("Content-Encoding")=="br" and havebrotli
):
1676 returnval_content
= brotli
.decompress(returnval_content
);
1677 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.headers
), 'HeadersSent': make_http_headers_from_list_to_dict(httpheaders
), 'URL': geturls_text
.url
, 'Code': geturls_text
.status_code
};
1678 geturls_text
.close();
1682 def download_from_url_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1683 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
1687 def download_from_url_file_with_httpx(httpurl
, httpheaders
, httpcookie
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1688 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1689 exec_time_start
= time
.time();
1690 myhash
= hashlib
.new("sha1");
1691 if(sys
.version
[0]=="2"):
1692 myhash
.update(httpurl
);
1693 myhash
.update(str(buffersize
));
1694 myhash
.update(str(exec_time_start
));
1695 if(sys
.version
[0]>="3"):
1696 myhash
.update(httpurl
.encode('utf-8'));
1697 myhash
.update(str(buffersize
).encode('utf-8'));
1698 myhash
.update(str(exec_time_start
).encode('utf-8'));
1699 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1701 sleep
= geturls_download_sleep
;
1702 urlparts
= urlparse
.urlparse(httpurl
);
1703 if(isinstance(httpheaders
, list)):
1704 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1705 if(urlparts
.username
is not None or urlparts
.password
is not None):
1706 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1707 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1709 if(postdata
is not None and not isinstance(postdata
, dict)):
1710 postdata
= urlencode(postdata
);
1712 if(httpmethod
=="GET"):
1713 geturls_text
= httpx
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1714 elif(httpmethod
=="POST"):
1715 geturls_text
= httpx
.post(httpurl
, data
=postdata
, headers
=httpheaders
, cookies
=httpcookie
);
1717 geturls_text
= httpx
.get(httpurl
, headers
=httpheaders
, cookies
=httpcookie
);
1718 except requests
.exceptions
.ConnectTimeout
:
1719 log
.info("Error With URL "+httpurl
);
1721 except socket
.timeout
:
1722 log
.info("Error With URL "+httpurl
);
1724 downloadsize
= int(geturls_text
.headers
.get('Content-Length'));
1725 if(downloadsize
is not None):
1726 downloadsize
= int(downloadsize
);
1727 if downloadsize
is None: downloadsize
= 0;
1730 log
.info("Downloading URL "+httpurl
);
1731 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1732 tmpfilename
= f
.name
;
1733 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.headers
), 'HeadersSent': make_http_headers_from_list_to_dict(httpheaders
), 'URL': geturls_text
.url
, 'Code': geturls_text
.status_code
};
1734 for databytes
in geturls_text
.iter_content(chunk_size
=buffersize
):
1735 datasize
= len(databytes
);
1736 fulldatasize
= datasize
+ fulldatasize
;
1739 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1740 downloaddiff
= fulldatasize
- prevdownsize
;
1741 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1742 prevdownsize
= fulldatasize
;
1745 geturls_text
.close();
1746 exec_time_end
= time
.time();
1747 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1748 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1752 def download_from_url_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1753 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
1757 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1758 global geturls_download_sleep
;
1760 sleep
= geturls_download_sleep
;
1761 if(not outfile
=="-"):
1762 outpath
= outpath
.rstrip(os
.path
.sep
);
1763 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1764 if(not os
.path
.exists(outpath
)):
1765 os
.makedirs(outpath
);
1766 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1768 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1770 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1771 if(not pretmpfilename
):
1773 tmpfilename
= pretmpfilename
['Filename'];
1774 downloadsize
= os
.path
.getsize(tmpfilename
);
1776 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
1777 exec_time_start
= time
.time();
1778 shutil
.move(tmpfilename
, filepath
);
1779 exec_time_end
= time
.time();
1780 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
1781 if(os
.path
.exists(tmpfilename
)):
1782 os
.remove(tmpfilename
);
1783 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1784 if(outfile
=="-" and sys
.version
[0]=="2"):
1785 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1786 if(not pretmpfilename
):
1788 tmpfilename
= pretmpfilename
['Filename'];
1789 downloadsize
= os
.path
.getsize(tmpfilename
);
1792 exec_time_start
= time
.time();
1793 with
open(tmpfilename
, 'rb') as ft
:
1796 databytes
= ft
.read(buffersize
[1]);
1797 if not databytes
: break;
1798 datasize
= len(databytes
);
1799 fulldatasize
= datasize
+ fulldatasize
;
1802 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1803 downloaddiff
= fulldatasize
- prevdownsize
;
1804 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1805 prevdownsize
= fulldatasize
;
1808 fdata
= f
.getvalue();
1811 os
.remove(tmpfilename
);
1812 exec_time_end
= time
.time();
1813 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1814 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1815 if(outfile
=="-" and sys
.version
[0]>="3"):
1816 pretmpfilename
= download_from_url_file_with_httpx(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1817 tmpfilename
= pretmpfilename
['Filename'];
1818 downloadsize
= os
.path
.getsize(tmpfilename
);
1821 exec_time_start
= time
.time();
1822 with
open(tmpfilename
, 'rb') as ft
:
1825 databytes
= ft
.read(buffersize
[1]);
1826 if not databytes
: break;
1827 datasize
= len(databytes
);
1828 fulldatasize
= datasize
+ fulldatasize
;
1831 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1832 downloaddiff
= fulldatasize
- prevdownsize
;
1833 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1834 prevdownsize
= fulldatasize
;
1837 fdata
= f
.getvalue();
1840 os
.remove(tmpfilename
);
1841 exec_time_end
= time
.time();
1842 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
1843 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
1847 def download_from_url_to_file_with_httpx(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1848 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
1852 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1853 global geturls_download_sleep
, havebrotli
;
1855 sleep
= geturls_download_sleep
;
1856 urlparts
= urlparse
.urlparse(httpurl
);
1857 if(isinstance(httpheaders
, list)):
1858 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1859 if(urlparts
.username
is not None or urlparts
.password
is not None):
1860 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1861 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1863 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
1864 if(postdata
is not None and not isinstance(postdata
, dict)):
1865 postdata
= urlencode(postdata
);
1867 if(httpmethod
=="GET"):
1868 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1869 elif(httpmethod
=="POST"):
1870 geturls_text
= geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
1872 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1873 except urllib3
.exceptions
.ConnectTimeoutError
:
1874 log
.info("Error With URL "+httpurl
);
1876 except urllib3
.exceptions
.MaxRetryError
:
1877 log
.info("Error With URL "+httpurl
);
1879 except socket
.timeout
:
1880 log
.info("Error With URL "+httpurl
);
1882 log
.info("Downloading URL "+httpurl
);
1883 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
1884 if(sys
.version
[0]=="2"):
1885 strbuf
= StringIO(geturls_text
.read());
1886 if(sys
.version
[0]>="3"):
1887 strbuf
= BytesIO(geturls_text
.read());
1888 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
1889 returnval_content
= gzstrbuf
.read()[:];
1890 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
1891 returnval_content
= geturls_text
.read()[:];
1892 if(geturls_text
.info().get("Content-Encoding")=="br" and havebrotli
):
1893 returnval_content
= brotli
.decompress(returnval_content
);
1894 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.info()), 'HeadersSent': httpheaders
, 'URL': geturls_text
.geturl(), 'Code': geturls_text
.status
};
1895 geturls_text
.close();
1898 if(not haveurllib3
):
1899 def download_from_url_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
1900 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
1904 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1905 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
1906 exec_time_start
= time
.time();
1907 myhash
= hashlib
.new("sha1");
1908 if(sys
.version
[0]=="2"):
1909 myhash
.update(httpurl
);
1910 myhash
.update(str(buffersize
));
1911 myhash
.update(str(exec_time_start
));
1912 if(sys
.version
[0]>="3"):
1913 myhash
.update(httpurl
.encode('utf-8'));
1914 myhash
.update(str(buffersize
).encode('utf-8'));
1915 myhash
.update(str(exec_time_start
).encode('utf-8'));
1916 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
1918 sleep
= geturls_download_sleep
;
1919 urlparts
= urlparse
.urlparse(httpurl
);
1920 if(isinstance(httpheaders
, list)):
1921 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
1922 if(urlparts
.username
is not None or urlparts
.password
is not None):
1923 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
1924 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
1926 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
1927 if(postdata
is not None and not isinstance(postdata
, dict)):
1928 postdata
= urlencode(postdata
);
1930 if(httpmethod
=="GET"):
1931 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1932 elif(httpmethod
=="POST"):
1933 geturls_text
= geturls_text
= urllib_pool
.request("POST", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
1935 geturls_text
= geturls_text
= urllib_pool
.request("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
1936 except urllib3
.exceptions
.ConnectTimeoutError
:
1937 log
.info("Error With URL "+httpurl
);
1939 except urllib3
.exceptions
.MaxRetryError
:
1940 log
.info("Error With URL "+httpurl
);
1942 except socket
.timeout
:
1943 log
.info("Error With URL "+httpurl
);
1945 downloadsize
= int(geturls_text
.headers
.get('Content-Length'));
1946 if(downloadsize
is not None):
1947 downloadsize
= int(downloadsize
);
1948 if downloadsize
is None: downloadsize
= 0;
1951 log
.info("Downloading URL "+httpurl
);
1952 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
1953 tmpfilename
= f
.name
;
1954 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.info()), 'HeadersSent': httpheaders
, 'URL': geturls_text
.geturl(), 'Code': geturls_text
.status
};
1956 databytes
= geturls_text
.read(buffersize
);
1957 if not databytes
: break;
1958 datasize
= len(databytes
);
1959 fulldatasize
= datasize
+ fulldatasize
;
1962 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
1963 downloaddiff
= fulldatasize
- prevdownsize
;
1964 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
1965 prevdownsize
= fulldatasize
;
1968 geturls_text
.close();
1969 exec_time_end
= time
.time();
1970 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
1971 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
1974 if(not haveurllib3
):
1975 def download_from_url_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
1976 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
1980 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
1981 global geturls_download_sleep
;
1983 sleep
= geturls_download_sleep
;
1984 if(not outfile
=="-"):
1985 outpath
= outpath
.rstrip(os
.path
.sep
);
1986 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
1987 if(not os
.path
.exists(outpath
)):
1988 os
.makedirs(outpath
);
1989 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
1991 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
1993 pretmpfilename
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
1994 if(not pretmpfilename
):
1996 tmpfilename
= pretmpfilename
['Filename'];
1997 downloadsize
= os
.path
.getsize(tmpfilename
);
1999 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2000 exec_time_start
= time
.time();
2001 shutil
.move(tmpfilename
, filepath
);
2002 exec_time_end
= time
.time();
2003 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2004 if(os
.path
.exists(tmpfilename
)):
2005 os
.remove(tmpfilename
);
2006 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2007 if(outfile
=="-" and sys
.version
[0]=="2"):
2008 pretmpfilename
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2009 if(not pretmpfilename
):
2011 tmpfilename
= pretmpfilename
['Filename'];
2012 downloadsize
= os
.path
.getsize(tmpfilename
);
2015 exec_time_start
= time
.time();
2016 with
open(tmpfilename
, 'rb') as ft
:
2019 databytes
= ft
.read(buffersize
[1]);
2020 if not databytes
: break;
2021 datasize
= len(databytes
);
2022 fulldatasize
= datasize
+ fulldatasize
;
2025 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2026 downloaddiff
= fulldatasize
- prevdownsize
;
2027 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2028 prevdownsize
= fulldatasize
;
2031 fdata
= f
.getvalue();
2034 os
.remove(tmpfilename
);
2035 exec_time_end
= time
.time();
2036 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2037 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2038 if(outfile
=="-" and sys
.version
[0]>="3"):
2039 pretmpfilename
= download_from_url_file_with_request3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2040 tmpfilename
= pretmpfilename
['Filename'];
2041 downloadsize
= os
.path
.getsize(tmpfilename
);
2044 exec_time_start
= time
.time();
2045 with
open(tmpfilename
, 'rb') as ft
:
2048 databytes
= ft
.read(buffersize
[1]);
2049 if not databytes
: break;
2050 datasize
= len(databytes
);
2051 fulldatasize
= datasize
+ fulldatasize
;
2054 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2055 downloaddiff
= fulldatasize
- prevdownsize
;
2056 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2057 prevdownsize
= fulldatasize
;
2060 fdata
= f
.getvalue();
2063 os
.remove(tmpfilename
);
2064 exec_time_end
= time
.time();
2065 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2066 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2069 if(not haveurllib3
):
2070 def download_from_url_to_file_with_request3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2071 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
2075 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2076 global geturls_download_sleep
, havebrotli
;
2078 sleep
= geturls_download_sleep
;
2079 urlparts
= urlparse
.urlparse(httpurl
);
2080 if(isinstance(httpheaders
, list)):
2081 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2082 if(urlparts
.username
is not None or urlparts
.password
is not None):
2083 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2084 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2086 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
2087 if(postdata
is not None and not isinstance(postdata
, dict)):
2088 postdata
= urlencode(postdata
);
2090 if(httpmethod
=="GET"):
2091 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
2092 elif(httpmethod
=="POST"):
2093 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
2095 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
2096 except urllib3
.exceptions
.ConnectTimeoutError
:
2097 log
.info("Error With URL "+httpurl
);
2099 except urllib3
.exceptions
.MaxRetryError
:
2100 log
.info("Error With URL "+httpurl
);
2102 except socket
.timeout
:
2103 log
.info("Error With URL "+httpurl
);
2105 log
.info("Downloading URL "+httpurl
);
2106 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
2107 if(sys
.version
[0]=="2"):
2108 strbuf
= StringIO(geturls_text
.read());
2109 if(sys
.version
[0]>="3"):
2110 strbuf
= BytesIO(geturls_text
.read());
2111 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
2112 returnval_content
= gzstrbuf
.read()[:];
2113 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
2114 returnval_content
= geturls_text
.read()[:];
2115 if(geturls_text
.info().get("Content-Encoding")=="br" and havebrotli
):
2116 returnval_content
= brotli
.decompress(returnval_content
);
2117 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.info()), 'HeadersSent': httpheaders
, 'URL': geturls_text
.geturl(), 'Code': geturls_text
.status
};
2118 geturls_text
.close();
2121 if(not haveurllib3
):
2122 def download_from_url_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2123 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
2127 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2128 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
2129 exec_time_start
= time
.time();
2130 myhash
= hashlib
.new("sha1");
2131 if(sys
.version
[0]=="2"):
2132 myhash
.update(httpurl
);
2133 myhash
.update(str(buffersize
));
2134 myhash
.update(str(exec_time_start
));
2135 if(sys
.version
[0]>="3"):
2136 myhash
.update(httpurl
.encode('utf-8'));
2137 myhash
.update(str(buffersize
).encode('utf-8'));
2138 myhash
.update(str(exec_time_start
).encode('utf-8'));
2139 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2141 sleep
= geturls_download_sleep
;
2142 urlparts
= urlparse
.urlparse(httpurl
);
2143 if(isinstance(httpheaders
, list)):
2144 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2145 if(urlparts
.username
is not None or urlparts
.password
is not None):
2146 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2147 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2149 urllib_pool
= urllib3
.PoolManager(headers
=httpheaders
);
2150 if(postdata
is not None and not isinstance(postdata
, dict)):
2151 postdata
= urlencode(postdata
);
2153 if(httpmethod
=="GET"):
2154 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
2155 elif(httpmethod
=="POST"):
2156 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, body
=postdata
, headers
=httpheaders
, preload_content
=False);
2158 geturls_text
= urllib_pool
.urlopen("GET", httpurl
, headers
=httpheaders
, preload_content
=False);
2159 except urllib3
.exceptions
.ConnectTimeoutError
:
2160 log
.info("Error With URL "+httpurl
);
2162 except urllib3
.exceptions
.MaxRetryError
:
2163 log
.info("Error With URL "+httpurl
);
2165 except socket
.timeout
:
2166 log
.info("Error With URL "+httpurl
);
2168 downloadsize
= int(geturls_text
.headers
.get('Content-Length'));
2169 if(downloadsize
is not None):
2170 downloadsize
= int(downloadsize
);
2171 if downloadsize
is None: downloadsize
= 0;
2174 log
.info("Downloading URL "+httpurl
);
2175 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2176 tmpfilename
= f
.name
;
2177 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.info()), 'HeadersSent': httpheaders
, 'URL': geturls_text
.geturl(), 'Code': geturls_text
.status
};
2179 databytes
= geturls_text
.read(buffersize
);
2180 if not databytes
: break;
2181 datasize
= len(databytes
);
2182 fulldatasize
= datasize
+ fulldatasize
;
2185 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2186 downloaddiff
= fulldatasize
- prevdownsize
;
2187 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2188 prevdownsize
= fulldatasize
;
2191 geturls_text
.close();
2192 exec_time_end
= time
.time();
2193 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2194 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2197 if(not haveurllib3
):
2198 def download_from_url_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2199 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
2203 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2204 global geturls_download_sleep
;
2206 sleep
= geturls_download_sleep
;
2207 if(not outfile
=="-"):
2208 outpath
= outpath
.rstrip(os
.path
.sep
);
2209 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2210 if(not os
.path
.exists(outpath
)):
2211 os
.makedirs(outpath
);
2212 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2214 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2216 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2217 if(not pretmpfilename
):
2219 tmpfilename
= pretmpfilename
['Filename'];
2220 downloadsize
= os
.path
.getsize(tmpfilename
);
2222 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2223 exec_time_start
= time
.time();
2224 shutil
.move(tmpfilename
, filepath
);
2225 exec_time_end
= time
.time();
2226 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2227 if(os
.path
.exists(tmpfilename
)):
2228 os
.remove(tmpfilename
);
2229 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2230 if(outfile
=="-" and sys
.version
[0]=="2"):
2231 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2232 if(not pretmpfilename
):
2234 tmpfilename
= pretmpfilename
['Filename'];
2235 downloadsize
= os
.path
.getsize(tmpfilename
);
2238 exec_time_start
= time
.time();
2239 with
open(tmpfilename
, 'rb') as ft
:
2242 databytes
= ft
.read(buffersize
[1]);
2243 if not databytes
: break;
2244 datasize
= len(databytes
);
2245 fulldatasize
= datasize
+ fulldatasize
;
2248 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2249 downloaddiff
= fulldatasize
- prevdownsize
;
2250 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2251 prevdownsize
= fulldatasize
;
2254 fdata
= f
.getvalue();
2257 os
.remove(tmpfilename
);
2258 exec_time_end
= time
.time();
2259 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2260 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2261 if(outfile
=="-" and sys
.version
[0]>="3"):
2262 pretmpfilename
= download_from_url_file_with_urllib3(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2263 tmpfilename
= pretmpfilename
['Filename'];
2264 downloadsize
= os
.path
.getsize(tmpfilename
);
2267 exec_time_start
= time
.time();
2268 with
open(tmpfilename
, 'rb') as ft
:
2271 databytes
= ft
.read(buffersize
[1]);
2272 if not databytes
: break;
2273 datasize
= len(databytes
);
2274 fulldatasize
= datasize
+ fulldatasize
;
2277 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2278 downloaddiff
= fulldatasize
- prevdownsize
;
2279 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2280 prevdownsize
= fulldatasize
;
2283 fdata
= f
.getvalue();
2286 os
.remove(tmpfilename
);
2287 exec_time_end
= time
.time();
2288 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2289 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2292 if(not haveurllib3
):
2293 def download_from_url_to_file_with_urllib3(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2294 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
2298 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2299 global geturls_download_sleep
, havebrotli
;
2301 sleep
= geturls_download_sleep
;
2302 urlparts
= urlparse
.urlparse(httpurl
);
2303 if(isinstance(httpheaders
, list)):
2304 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2305 if(urlparts
.username
is not None or urlparts
.password
is not None):
2306 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2307 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2308 geturls_opener
= mechanize
.Browser();
2309 if(isinstance(httpheaders
, dict)):
2310 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
2312 geturls_opener
.addheaders
= httpheaders
;
2313 geturls_opener
.set_cookiejar(httpcookie
);
2314 geturls_opener
.set_handle_robots(False);
2315 if(postdata
is not None and not isinstance(postdata
, dict)):
2316 postdata
= urlencode(postdata
);
2318 if(httpmethod
=="GET"):
2319 geturls_text
= geturls_opener
.open(httpurl
);
2320 elif(httpmethod
=="POST"):
2321 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
2323 geturls_text
= geturls_opener
.open(httpurl
);
2324 except mechanize
.HTTPError
as geturls_text_error
:
2325 geturls_text
= geturls_text_error
;
2326 log
.info("Error With URL "+httpurl
);
2328 log
.info("Error With URL "+httpurl
);
2330 except socket
.timeout
:
2331 log
.info("Error With URL "+httpurl
);
2333 log
.info("Downloading URL "+httpurl
);
2334 if(geturls_text
.info().get("Content-Encoding")=="gzip" or geturls_text
.info().get("Content-Encoding")=="deflate"):
2335 if(sys
.version
[0]=="2"):
2336 strbuf
= StringIO(geturls_text
.read());
2337 if(sys
.version
[0]>="3"):
2338 strbuf
= BytesIO(geturls_text
.read());
2339 gzstrbuf
= gzip
.GzipFile(fileobj
=strbuf
);
2340 returnval_content
= gzstrbuf
.read()[:];
2341 if(geturls_text
.info().get("Content-Encoding")!="gzip" and geturls_text
.info().get("Content-Encoding")!="deflate"):
2342 returnval_content
= geturls_text
.read()[:];
2343 if(geturls_text
.info().get("Content-Encoding")=="br" and havebrotli
):
2344 returnval_content
= brotli
.decompress(returnval_content
);
2345 returnval
= {'Type': "Content", 'Content': returnval_content
, 'Headers': dict(geturls_text
.info()), 'HeadersSent': make_http_headers_from_list_to_dict(httpheaders
), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.code
};
2346 geturls_text
.close();
2349 if(not havemechanize
):
2350 def download_from_url_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, sleep
=-1):
2351 returnval
= download_from_url_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, sleep
)
2355 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2356 global geturls_download_sleep
, tmpfileprefix
, tmpfilesuffix
;
2357 exec_time_start
= time
.time();
2358 myhash
= hashlib
.new("sha1");
2359 if(sys
.version
[0]=="2"):
2360 myhash
.update(httpurl
);
2361 myhash
.update(str(buffersize
));
2362 myhash
.update(str(exec_time_start
));
2363 if(sys
.version
[0]>="3"):
2364 myhash
.update(httpurl
.encode('utf-8'));
2365 myhash
.update(str(buffersize
).encode('utf-8'));
2366 myhash
.update(str(exec_time_start
).encode('utf-8'));
2367 newtmpfilesuffix
= tmpfilesuffix
+ str(myhash
.hexdigest());
2369 sleep
= geturls_download_sleep
;
2370 urlparts
= urlparse
.urlparse(httpurl
);
2371 if(isinstance(httpheaders
, list)):
2372 httpheaders
= make_http_headers_from_list_to_dict(httpheaders
);
2373 if(urlparts
.username
is not None or urlparts
.password
is not None):
2374 inurlencode
= b64encode(str(urlparts
.username
+":"+urlparts
.password
).encode()).decode("UTF-8");
2375 httpheaders
.update( { 'Authorization': "Basic "+inurlencode
} );
2376 geturls_opener
= mechanize
.Browser();
2377 if(isinstance(httpheaders
, dict)):
2378 httpheaders
= make_http_headers_from_dict_to_list(httpheaders
);
2380 geturls_opener
.addheaders
= httpheaders
;
2381 geturls_opener
.set_cookiejar(httpcookie
);
2382 geturls_opener
.set_handle_robots(False);
2383 if(postdata
is not None and not isinstance(postdata
, dict)):
2384 postdata
= urlencode(postdata
);
2386 if(httpmethod
=="GET"):
2387 geturls_text
= geturls_opener
.open(httpurl
);
2388 elif(httpmethod
=="POST"):
2389 geturls_text
= geturls_opener
.open(httpurl
, data
=postdata
);
2391 geturls_text
= geturls_opener
.open(httpurl
);
2392 except mechanize
.HTTPError
as geturls_text_error
:
2393 geturls_text
= geturls_text_error
;
2394 log
.info("Error With URL "+httpurl
);
2396 log
.info("Error With URL "+httpurl
);
2398 except socket
.timeout
:
2399 log
.info("Error With URL "+httpurl
);
2401 downloadsize
= int(geturls_text
.info().get('Content-Length'));
2402 if(downloadsize
is not None):
2403 downloadsize
= int(downloadsize
);
2404 if downloadsize
is None: downloadsize
= 0;
2407 log
.info("Downloading URL "+httpurl
);
2408 with tempfile
.NamedTemporaryFile('wb+', prefix
=tmpfileprefix
, suffix
=newtmpfilesuffix
, delete
=False) as f
:
2409 tmpfilename
= f
.name
;
2410 returnval
= {'Type': "File", 'Filename': tmpfilename
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'Headers': dict(geturls_text
.info()), 'HeadersSent': make_http_headers_from_list_to_dict(httpheaders
), 'URL': geturls_text
.geturl(), 'Code': geturls_text
.code
};
2412 databytes
= geturls_text
.read(buffersize
);
2413 if not databytes
: break;
2414 datasize
= len(databytes
);
2415 fulldatasize
= datasize
+ fulldatasize
;
2418 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2419 downloaddiff
= fulldatasize
- prevdownsize
;
2420 log
.info("Downloading "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Downloaded "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2421 prevdownsize
= fulldatasize
;
2424 geturls_text
.close();
2425 exec_time_end
= time
.time();
2426 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to download file.");
2427 returnval
.update({'Filesize': os
.path
.getsize(tmpfilename
), 'DownloadTime': float(exec_time_start
- exec_time_end
), 'DownloadTimeReadable': hms_string(exec_time_start
- exec_time_end
)});
2430 if(not havemechanize
):
2431 def download_from_url_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, buffersize
=524288, sleep
=-1):
2432 returnval
= download_from_url_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, sleep
)
2436 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2437 global geturls_download_sleep
;
2439 sleep
= geturls_download_sleep
;
2440 if(not outfile
=="-"):
2441 outpath
= outpath
.rstrip(os
.path
.sep
);
2442 filepath
= os
.path
.realpath(outpath
+os
.path
.sep
+outfile
);
2443 if(not os
.path
.exists(outpath
)):
2444 os
.makedirs(outpath
);
2445 if(os
.path
.exists(outpath
) and os
.path
.isfile(outpath
)):
2447 if(os
.path
.exists(filepath
) and os
.path
.isdir(filepath
)):
2449 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2450 if(not pretmpfilename
):
2452 tmpfilename
= pretmpfilename
['Filename'];
2453 downloadsize
= os
.path
.getsize(tmpfilename
);
2455 log
.info("Moving file "+tmpfilename
+" to "+filepath
);
2456 exec_time_start
= time
.time();
2457 shutil
.move(tmpfilename
, filepath
);
2458 exec_time_end
= time
.time();
2459 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to move file.");
2460 if(os
.path
.exists(tmpfilename
)):
2461 os
.remove(tmpfilename
);
2462 returnval
= {'Type': "File", 'Filename': filepath
, 'Filesize': downloadsize
, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2463 if(outfile
=="-" and sys
.version
[0]=="2"):
2464 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2465 if(not pretmpfilename
):
2467 tmpfilename
= pretmpfilename
['Filename'];
2468 downloadsize
= os
.path
.getsize(tmpfilename
);
2471 exec_time_start
= time
.time();
2472 with
open(tmpfilename
, 'rb') as ft
:
2475 databytes
= ft
.read(buffersize
[1]);
2476 if not databytes
: break;
2477 datasize
= len(databytes
);
2478 fulldatasize
= datasize
+ fulldatasize
;
2481 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2482 downloaddiff
= fulldatasize
- prevdownsize
;
2483 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2484 prevdownsize
= fulldatasize
;
2487 fdata
= f
.getvalue();
2490 os
.remove(tmpfilename
);
2491 exec_time_end
= time
.time();
2492 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2493 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': pretmpfilename
['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2494 if(outfile
=="-" and sys
.version
[0]>="3"):
2495 pretmpfilename
= download_from_url_file_with_mechanize(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
[0], sleep
);
2496 tmpfilename
= pretmpfilename
['Filename'];
2497 downloadsize
= os
.path
.getsize(tmpfilename
);
2500 exec_time_start
= time
.time();
2501 with
open(tmpfilename
, 'rb') as ft
:
2504 databytes
= ft
.read(buffersize
[1]);
2505 if not databytes
: break;
2506 datasize
= len(databytes
);
2507 fulldatasize
= datasize
+ fulldatasize
;
2510 percentage
= str("{0:.2f}".format(float(float(fulldatasize
/ downloadsize
) * 100))).rstrip('0').rstrip('.')+"%";
2511 downloaddiff
= fulldatasize
- prevdownsize
;
2512 log
.info("Copying "+get_readable_size(fulldatasize
, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize
, 2, "SI")['ReadableWithSuffix']+" "+str(percentage
)+" / Copied "+get_readable_size(downloaddiff
, 2, "IEC")['ReadableWithSuffix']);
2513 prevdownsize
= fulldatasize
;
2516 fdata
= f
.getvalue();
2519 os
.remove(tmpfilename
);
2520 exec_time_end
= time
.time();
2521 log
.info("It took "+hms_string(exec_time_start
- exec_time_end
)+" to copy file.");
2522 returnval
= {'Type': "Content", 'Content': fdata
, 'Contentsize': downloadsize
, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize
, 2, "IEC"), 'SI': get_readable_size(downloadsize
, 2, "SI")}, 'DownloadTime': pretmpfilename
['DownloadTime'], 'DownloadTimeReadable': pretmpfilename
['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start
- exec_time_end
), 'MoveFileTimeReadable': hms_string(exec_time_start
- exec_time_end
), 'Headers': pretmpfilename
['Headers'], 'HeadersSent': ['HeadersSent'], 'URL': pretmpfilename
['URL'], 'Code': pretmpfilename
['Code']};
2525 if(not havemechanize
):
2526 def download_from_url_to_file_with_mechanize(httpurl
, httpheaders
=geturls_headers
, httpcookie
=geturls_cj
, httpmethod
="GET", postdata
=None, outfile
="-", outpath
=os
.getcwd(), buffersize
=[524288, 524288], sleep
=-1):
2527 returnval
= download_from_url_to_file_with_urllib(httpurl
, httpheaders
, httpcookie
, httpmethod
, postdata
, buffersize
, outfile
, outpath
, sleep
)
2530 def download_file_from_ftp_file(url
):
2531 urlparts
= urlparse
.urlparse(url
);
2532 file_name
= os
.path
.basename(urlparts
.path
);
2533 file_dir
= os
.path
.dirname(urlparts
.path
);
2534 if(urlparts
.username
is not None):
2535 ftp_username
= urlparts
.username
;
2537 ftp_username
= "anonymous";
2538 if(urlparts
.password
is not None):
2539 ftp_password
= urlparts
.password
;
2540 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2541 ftp_password
= "anonymous";
2544 if(urlparts
.scheme
=="ftp"):
2546 elif(urlparts
.scheme
=="ftps"):
2550 ftp
.connect(urlparts
.hostname
, urlparts
.port
);
2551 ftp
.login(urlparts
.username
, urlparts
.password
);
2552 if(urlparts
.scheme
=="ftps"):
2554 ftpfile
= BytesIO();
2555 ftp
.retrbinary("RETR "+urlparts
.path
, ftpfile
.write
);
2556 #ftp.storbinary("STOR "+urlparts.path, ftpfile.write);
2561 def download_file_from_ftp_string(url
):
2562 ftpfile
= download_file_from_ftp_file(url
);
2563 return ftpfile
.read();
2565 def upload_file_to_ftp_file(ftpfile
, url
):
2566 urlparts
= urlparse
.urlparse(url
);
2567 file_name
= os
.path
.basename(urlparts
.path
);
2568 file_dir
= os
.path
.dirname(urlparts
.path
);
2569 if(urlparts
.username
is not None):
2570 ftp_username
= urlparts
.username
;
2572 ftp_username
= "anonymous";
2573 if(urlparts
.password
is not None):
2574 ftp_password
= urlparts
.password
;
2575 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2576 ftp_password
= "anonymous";
2579 if(urlparts
.scheme
=="ftp"):
2581 elif(urlparts
.scheme
=="ftps"):
2585 ftp
.connect(urlparts
.hostname
, urlparts
.port
);
2586 ftp
.login(urlparts
.username
, urlparts
.password
);
2587 if(urlparts
.scheme
=="ftps"):
2589 ftp
.storbinary("STOR "+urlparts
.path
, ftpfile
);
2594 def upload_file_to_ftp_string(ftpstring
, url
):
2595 ftpfileo
= BytesIO(ftpstring
);
2596 ftpfile
= upload_file_to_ftp_file(ftpfileo
, url
);
2601 def download_file_from_sftp_file(url
):
2602 urlparts
= urlparse
.urlparse(url
);
2603 file_name
= os
.path
.basename(urlparts
.path
);
2604 file_dir
= os
.path
.dirname(urlparts
.path
);
2605 sftp_port
= urlparts
.port
;
2606 if(urlparts
.port
is None):
2609 sftp_port
= urlparts
.port
;
2610 if(urlparts
.username
is not None):
2611 sftp_username
= urlparts
.username
;
2613 sftp_username
= "anonymous";
2614 if(urlparts
.password
is not None):
2615 sftp_password
= urlparts
.password
;
2616 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2617 sftp_password
= "anonymous";
2620 if(urlparts
.scheme
!="sftp"):
2622 ssh
= paramiko
.SSHClient();
2623 ssh
.load_system_host_keys();
2624 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
2626 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
2627 except paramiko
.ssh_exception
.SSHException
:
2629 sftp
= ssh
.open_sftp();
2630 sftpfile
= BytesIO();
2631 sftp
.getfo(urlparts
.path
, sftpfile
);
2634 sftpfile
.seek(0, 0);
2637 def download_file_from_sftp_file(url
):
2641 def download_file_from_sftp_string(url
):
2642 sftpfile
= download_file_from_sftp_file(url
);
2643 return sftpfile
.read();
2645 def download_file_from_ftp_string(url
):
2649 def upload_file_to_sftp_file(sftpfile
, url
):
2650 urlparts
= urlparse
.urlparse(url
);
2651 file_name
= os
.path
.basename(urlparts
.path
);
2652 file_dir
= os
.path
.dirname(urlparts
.path
);
2653 sftp_port
= urlparts
.port
;
2654 if(urlparts
.port
is None):
2657 sftp_port
= urlparts
.port
;
2658 if(urlparts
.username
is not None):
2659 sftp_username
= urlparts
.username
;
2661 sftp_username
= "anonymous";
2662 if(urlparts
.password
is not None):
2663 sftp_password
= urlparts
.password
;
2664 elif(urlparts
.password
is None and urlparts
.username
=="anonymous"):
2665 sftp_password
= "anonymous";
2668 if(urlparts
.scheme
!="sftp"):
2670 ssh
= paramiko
.SSHClient();
2671 ssh
.load_system_host_keys();
2672 ssh
.set_missing_host_key_policy(paramiko
.AutoAddPolicy());
2674 ssh
.connect(urlparts
.hostname
, port
=sftp_port
, username
=urlparts
.username
, password
=urlparts
.password
);
2675 except paramiko
.ssh_exception
.SSHException
:
2677 sftp
= ssh
.open_sftp();
2678 sftp
.putfo(sftpfile
, urlparts
.path
);
2681 sftpfile
.seek(0, 0);
2684 def upload_file_to_sftp_file(sftpfile
, url
):
2688 def upload_file_to_sftp_string(sftpstring
, url
):
2689 sftpfileo
= BytesIO(sftpstring
);
2690 sftpfile
= upload_file_to_sftp_files(ftpfileo
, url
);
2694 def upload_file_to_sftp_string(url
):