pywwwget.py

   1 #!/usr/bin/env python
   2
   3 '''
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the Revised BSD License.
   6
   7     This program is distributed in the hope that it will be useful,
   8     but WITHOUT ANY WARRANTY; without even the implied warranty of
   9     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  10     Revised BSD License for more details.
  11
  12     Copyright 2016 Cool Dude 2k - http://idb.berlios.de/
  13     Copyright 2016 Game Maker 2k - http://intdb.sourceforge.net/
  14     Copyright 2016 Kazuki Przyborowski - https://github.com/KazukiPrzyborowski
  15
  16     $FileInfo: pywwwget.py - Last Update: 6/17/2016 Ver. 0.4.7 RC 1 - Author: cooldude2k $
  17 '''
  18
  19 from __future__ import division, absolute_import, print_function;
  20 import re, os, sys, hashlib, shutil, platform, tempfile, urllib, gzip, time, argparse, cgi, subprocess, imp;
  21 import logging as log;
  22 haverequests = False;
  23 try:
  24  imp.find_module('requests');
  25  haverequests = True;
  26  import requests;
  27 except ImportError:
  28  haverequests = False;
  29 havemechanize = False;
  30 try:
  31  imp.find_module('mechanize');
  32  havemechanize = True;
  33  import mechanize;
  34 except ImportError:
  35  havemechanize = False;
  36 if(sys.version[0]=="2"):
  37  try:
  38   from cStringIO import StringIO;
  39  except ImportError:
  40   from StringIO import StringIO;
  41  # From http://python-future.org/compatible_idioms.html
  42  from urlparse import urlparse, urlunparse, urlsplit, urlunsplit, urljoin;
  43  from urllib import urlencode;
  44  from urllib2 import urlopen, Request, HTTPError;
  45  import urllib2, urlparse, cookielib;
  46 if(sys.version[0]>="3"):
  47  from io import StringIO, BytesIO;
  48  # From http://python-future.org/compatible_idioms.html
  49  from urllib.parse import urlparse, urlunparse, urlsplit, urlunsplit, urljoin, urlencode;
  50  from urllib.request import urlopen, Request;
  51  from urllib.error import HTTPError;
  52  import urllib.request as urllib2;
  53  import urllib.parse as urlparse;
  54  import http.cookiejar as cookielib;
  55
  56 __program_name__ = "PyWWW-Get";
  57 __project__ = __program_name__;
  58 __project_url__ = "https://github.com/GameMaker2k/PyWWW-Get";
  59 __version_info__ = (0, 4, 7, "RC 1", 1);
  60 __version_date_info__ = (2016, 6, 17, "RC 1", 1);
  61 __version_date__ = str(__version_date_info__[0])+"."+str(__version_date_info__[1]).zfill(2)+"."+str(__version_date_info__[2]).zfill(2);
  62 if(__version_info__[4]!=None):
  63  __version_date_plusrc__ = __version_date__+"-"+str(__version_date_info__[4]);
  64 if(__version_info__[4]==None):
  65  __version_date_plusrc__ = __version_date__;
  66 if(__version_info__[3]!=None):
  67  __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2])+" "+str(__version_info__[3]);
  68 if(__version_info__[3]==None):
  69  __version__ = str(__version_info__[0])+"."+str(__version_info__[1])+"."+str(__version_info__[2]);
  70
  71 tmpfileprefix = "py"+str(sys.version_info[0])+"wwwget"+str(__version_info__[0])+"-";
  72 tmpfilesuffix = "-";
  73 pytempdir = tempfile.gettempdir();
  74
  75 geturls_cj = cookielib.CookieJar();
  76 geturls_ua_firefox_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0";
  77 geturls_ua_seamonkey_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 SeaMonkey/2.49.3";
  78 geturls_ua_chrome_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36";
  79 geturls_ua_chromium_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chromium/67.0.3396.99 Chrome/67.0.3396.99 Safari/537.36";
  80 geturls_ua_midori_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/538.15 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/538.15 Midori/0.5";
  81 geturls_ua_palemoon_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:52.9) Gecko/20100101 Goanna/3.4 Firefox/52.9 PaleMoon/27.9.3";
  82 geturls_ua_opera_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36 OPR/54.0.2952.54";
  83 geturls_ua_vivaldi_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36 Vivaldi/1.2.490.43";
  84 geturls_ua_internet_explorer_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64; Trident/7.0; rv:11.0) like Gecko";
  85 geturls_ua_microsoft_edge_windows7 = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/17.17134";
  86 geturls_ua_pywwwget_python = "Mozilla/5.0 (compatible; {proname}/{prover}; +{prourl})".format(proname=__project__, prover=__version__, prourl=__project_url__);
  87 if(platform.python_implementation()!=""):
  88  geturls_ua_pywwwget_python_alt = "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver=platform.system()+" "+platform.release(), archtype=platform.machine(), prourl=__project_url__, pyimp=platform.python_implementation(), pyver=platform.python_version(), proname=__project__, prover=__version__);
  89 if(platform.python_implementation()==""):
  90  geturls_ua_pywwwget_python_alt = "Mozilla/5.0 ({osver}; {archtype}; +{prourl}) {pyimp}/{pyver} (KHTML, like Gecko) {proname}/{prover}".format(osver=platform.system()+" "+platform.release(), archtype=platform.machine(), prourl=__project_url__, pyimp="Python", pyver=platform.python_version(), proname=__project__, prover=__version__);
  91 geturls_ua_googlebot_google = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
  92 geturls_ua_googlebot_google_old = "Googlebot/2.1 (+http://www.google.com/bot.html)";
  93 geturls_ua = geturls_ua_firefox_windows7;
  94 geturls_headers_firefox_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_firefox_windows7, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
  95 geturls_headers_seamonkey_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_seamonkey_windows7, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
  96 geturls_headers_chrome_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chrome_windows7, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
  97 geturls_headers_chromium_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_chromium_windows7, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
  98 geturls_headers_midori_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_midori_windows7, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
  99 geturls_headers_palemoon_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_palemoon_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
 100 geturls_headers_opera_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_opera_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
 101 geturls_headers_vivaldi_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_vivaldi_windows7, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
 102 geturls_headers_internet_explorer_windows7 = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_internet_explorer_windows7, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
 103 geturls_headers_pywwwget_python = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
 104 geturls_headers_pywwwget_python_alt = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_pywwwget_python_alt, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
 105 geturls_headers_googlebot_google = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
 106 geturls_headers_googlebot_google_old = {'Referer': "http://google.com/", 'User-Agent': geturls_ua_googlebot_google_old, 'Accept-Encoding': "none", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"};
 107 geturls_headers = geturls_headers_firefox_windows7;
 108 geturls_download_sleep = 0;
 109
 110 def add_url_param(url, **params):
 111  n=3;
 112  parts = list(urlparse.urlsplit(url));
 113  d = dict(cgi.parse_qsl(parts[n])); # use cgi.parse_qs for list values
 114  d.update(params);
 115  parts[n]=urlencode(d);
 116  return urlparse.urlunsplit(parts);
 117
 118 os.environ["PATH"] = os.environ["PATH"] + os.pathsep + os.path.dirname(os.path.realpath(__file__)) + os.pathsep + os.getcwd();
 119 def which_exec(execfile):
 120  for path in os.environ["PATH"].split(":"):
 121   if os.path.exists(path + "/" + execfile):
 122    return path + "/" + execfile;
 123
 124 def listize(varlist):
 125  il = 0;
 126  ix = len(varlist);
 127  ilx = 1;
 128  newlistreg = {};
 129  newlistrev = {};
 130  newlistfull = {};
 131  while(il < ix):
 132   newlistreg.update({ilx: varlist[il]});
 133   newlistrev.update({varlist[il]: ilx});
 134   ilx = ilx + 1;
 135   il = il + 1;
 136  newlistfull = {1: newlistreg, 2: newlistrev, 'reg': newlistreg, 'rev': newlistrev};
 137  return newlistfull;
 138
 139 def twolistize(varlist):
 140  il = 0;
 141  ix = len(varlist);
 142  ilx = 1;
 143  newlistnamereg = {};
 144  newlistnamerev = {};
 145  newlistdescreg = {};
 146  newlistdescrev = {};
 147  newlistfull = {};
 148  while(il < ix):
 149   newlistnamereg.update({ilx: varlist[il][0].strip()});
 150   newlistnamerev.update({varlist[il][0].strip(): ilx});
 151   newlistdescreg.update({ilx: varlist[il][1].strip()});
 152   newlistdescrev.update({varlist[il][1].strip(): ilx});
 153   ilx = ilx + 1;
 154   il = il + 1;
 155  newlistnametmp = {1: newlistnamereg, 2: newlistnamerev, 'reg': newlistnamereg, 'rev': newlistnamerev};
 156  newlistdesctmp = {1: newlistdescreg, 2: newlistdescrev, 'reg': newlistdescreg, 'rev': newlistdescrev};
 157  newlistfull = {1: newlistnametmp, 2: newlistdesctmp, 'name': newlistnametmp, 'desc': newlistdesctmp}
 158  return newlistfull;
 159
 160 def arglistize(proexec, *varlist):
 161  il = 0;
 162  ix = len(varlist);
 163  ilx = 1;
 164  newarglist = [proexec];
 165  while(il < ix):
 166   if varlist[il][0] is not None:
 167    newarglist.append(varlist[il][0]);
 168   if varlist[il][1] is not None:
 169    newarglist.append(varlist[il][1]);
 170   il = il + 1;
 171  return newarglist;
 172
 173 # hms_string by ArcGIS Python Recipes
 174 # https://arcpy.wordpress.com/2012/04/20/146/
 175 def hms_string(sec_elapsed):
 176  h = int(sec_elapsed / (60 * 60));
 177  m = int((sec_elapsed % (60 * 60)) / 60);
 178  s = sec_elapsed % 60.0;
 179  return "{}:{:>02}:{:>05.2f}".format(h, m, s);
 180
 181 # get_readable_size by Lipis
 182 # http://stackoverflow.com/posts/14998888/revisions
 183 def get_readable_size(bytes, precision=1, unit="IEC"):
 184  unit = unit.upper();
 185  if(unit!="IEC" and unit!="SI"):
 186   unit = "IEC";
 187  if(unit=="IEC"):
 188   units = [" B"," KiB"," MiB"," GiB"," TiB"," PiB"," EiB"," ZiB"];
 189   unitswos = ["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB"];
 190   unitsize = 1024.0;
 191  if(unit=="SI"):
 192   units = [" B"," kB"," MB"," GB"," TB"," PB"," EB"," ZB"];
 193   unitswos = ["B","kB","MB","GB","TB","PB","EB","ZB"];
 194   unitsize = 1000.0;
 195  return_val = {};
 196  orgbytes = bytes;
 197  for unit in units:
 198   if abs(bytes) < unitsize:
 199    strformat = "%3."+str(precision)+"f%s";
 200    pre_return_val = (strformat % (bytes, unit));
 201    pre_return_val = re.sub(r"([0]+) ([A-Za-z]+)", r" \2", pre_return_val);
 202    pre_return_val = re.sub(r"\. ([A-Za-z]+)", r" \1", pre_return_val);
 203    alt_return_val = pre_return_val.split();
 204    return_val = {'Bytes': orgbytes, 'ReadableWithSuffix': pre_return_val, 'ReadableWithoutSuffix': alt_return_val[0], 'ReadableSuffix': alt_return_val[1]}
 205    return return_val;
 206   bytes /= unitsize;
 207  strformat = "%."+str(precision)+"f%s";
 208  pre_return_val = (strformat % (bytes, "YiB"));
 209  pre_return_val = re.sub(r"([0]+) ([A-Za-z]+)", r" \2", pre_return_val);
 210  pre_return_val = re.sub(r"\. ([A-Za-z]+)", r" \1", pre_return_val);
 211  alt_return_val = pre_return_val.split();
 212  return_val = {'Bytes': orgbytes, 'ReadableWithSuffix': pre_return_val, 'ReadableWithoutSuffix': alt_return_val[0], 'ReadableSuffix': alt_return_val[1]}
 213  return return_val;
 214
 215 def get_readable_size_from_file(infile, precision=1, unit="IEC", usehashes=False, usehashtypes="md5,sha1"):
 216  unit = unit.upper();
 217  usehashtypes = usehashtypes.lower();
 218  getfilesize = os.path.getsize(infile);
 219  return_val = get_readable_size(getfilesize, precision, unit);
 220  if(usehashes==True):
 221   hashtypelist = usehashtypes.split(",");
 222   openfile = open(infile, "rb");
 223   filecontents = openfile.read();
 224   openfile.close();
 225   listnumcount = 0;
 226   listnumend = len(hashtypelist);
 227   while(listnumcount < listnumend):
 228    hashtypelistlow = hashtypelist[listnumcount].strip();
 229    hashtypelistup = hashtypelistlow.upper();
 230    filehash = hashlib.new(hashtypelistup);
 231    filehash.update(filecontents);
 232    filegethash = filehash.hexdigest();
 233    return_val.update({hashtypelistup: filegethash});
 234    listnumcount += 1;
 235  return return_val;
 236
 237 def get_readable_size_from_string(instring, precision=1, unit="IEC", usehashes=False, usehashtypes="md5,sha1"):
 238  unit = unit.upper();
 239  usehashtypes = usehashtypes.lower();
 240  getfilesize = len(instring);
 241  return_val = get_readable_size(getfilesize, precision, unit);
 242  if(usehashes==True):
 243   hashtypelist = usehashtypes.split(",");
 244   listnumcount = 0;
 245   listnumend = len(hashtypelist);
 246   while(listnumcount < listnumend):
 247    hashtypelistlow = hashtypelist[listnumcount].strip();
 248    hashtypelistup = hashtypelistlow.upper();
 249    filehash = hashlib.new(hashtypelistup);
 250    if(sys.version[0]=="2"):
 251     filehash.update(instring);
 252    if(sys.version[0]>="3"):
 253     filehash.update(instring.encode('utf-8'));
 254    filegethash = filehash.hexdigest();
 255    return_val.update({hashtypelistup: filegethash});
 256    listnumcount += 1;
 257  return return_val;
 258
 259 def make_http_headers_from_dict_to_list(headers={'Referer': "http://google.com/", 'User-Agent': geturls_ua, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
 260  if isinstance(headers, dict):
 261   returnval = [];
 262   if(sys.version[0]=="2"):
 263    for headkey, headvalue in headers.iteritems():
 264     returnval.append((headkey, headvalue));
 265   if(sys.version[0]>="3"):
 266    for headkey, headvalue in headers.items():
 267     returnval.append((headkey, headvalue));
 268  elif isinstance(headers, list):
 269   returnval = headers;
 270  else:
 271   returnval = False;
 272  return returnval;
 273
 274 def make_http_headers_from_dict_to_pycurl(headers={'Referer': "http://google.com/", 'User-Agent': geturls_ua, 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "en-US,en;q=0.8,en-CA,en-GB;q=0.6", 'Accept-Charset': "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7", 'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 'Connection': "close"}):
 275  if isinstance(headers, dict):
 276   returnval = [];
 277   if(sys.version[0]=="2"):
 278    for headkey, headvalue in headers.iteritems():
 279     returnval.append(headkey+": "+headvalue);
 280   if(sys.version[0]>="3"):
 281    for headkey, headvalue in headers.items():
 282     returnval.append(headkey+": "+headvalue);
 283  elif isinstance(headers, list):
 284   returnval = headers;
 285  else:
 286   returnval = False;
 287  return returnval;
 288
 289 def make_http_headers_from_list_to_dict(headers=[("Referer", "http://google.com/"), ("User-Agent", geturls_ua), ("Accept-Encoding", "gzip, deflate"), ("Accept-Language", "en-US,en;q=0.8,en-CA,en-GB;q=0.6"), ("Accept-Charset", "ISO-8859-1,ISO-8859-15,utf-8;q=0.7,*;q=0.7"), ("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"), ("Connection", "close")]):
 290  if isinstance(headers, list):
 291   returnval = {};
 292   mli = 0;
 293   mlil = len(headers);
 294   while(mli<mlil):
 295    returnval.update({headers[mli][0]: headers[mli][1]});
 296    mli = mli + 1;
 297  elif isinstance(headers, dict):
 298   returnval = headers;
 299  else:
 300   returnval = False;
 301  return returnval;
 302
 303 def get_httplib_support(checkvalue=None):
 304  global haverequests, havemechanize;
 305  returnval = [];
 306  returnval.append("urllib");
 307  if(haverequests==True):
 308   returnval.append("requests");
 309  if(havemechanize==True):
 310   returnval.append("mechanize");
 311  if(not checkvalue==None):
 312   if(checkvalue=="urllib1" or checkvalue=="urllib2"):
 313    checkvalue = "urllib";
 314   if(checkvalue in returnval):
 315    returnval = True;
 316   else:
 317    returnval = False;
 318  return returnval;
 319
 320 def check_httplib_support(checkvalue="urllib"):
 321  if(checkvalue=="urllib1" or checkvalue=="urllib2"):
 322   checkvalue = "urllib";
 323  returnval = get_httplib_support(checkvalue);
 324  return returnval;
 325
 326 def get_httplib_support_list():
 327  returnval = get_httplib_support(None);
 328  return returnval;
 329
 330 def download_from_url(httpurl, httpheaders, httpcookie, httplibuse="urllib", sleep=-1):
 331  global geturls_download_sleep, haverequests, havemechanize;
 332  if(sleep<0):
 333   sleep = geturls_download_sleep;
 334  if(httplibuse=="urllib1" or httplibuse=="urllib2"):
 335   httplibuse = "urllib";
 336  if(haverequests==False and httplibuse=="requests"):
 337   httplibuse = "urllib";
 338  if(havemechanize==False and httplibuse=="mechanize"):
 339   httplibuse = "urllib";
 340  if(httplibuse=="urllib"):
 341   returnval = download_from_url_with_urllib(httpurl, httpheaders, httpcookie, sleep);
 342  elif(httplibuse=="requests"):
 343   returnval = download_from_url_with_requests(httpurl, httpheaders, httpcookie, sleep);
 344  elif(httplibuse=="mechanize"):
 345   returnval = download_from_url_with_mechanize(httpurl, httpheaders, httpcookie, sleep);
 346  else:
 347   returnval = False;
 348  return returnval;
 349
 350 def download_from_url_file(httpurl, httpheaders, httpcookie, httplibuse="urllib", buffersize=524288, sleep=-1):
 351  global geturls_download_sleep, haverequests, havemechanize;
 352  if(sleep<0):
 353   sleep = geturls_download_sleep;
 354  if(httplibuse=="urllib1" or httplibuse=="urllib2"):
 355   httplibuse = "urllib";
 356  if(haverequests==False and httplibuse=="requests"):
 357   httplibuse = "urllib";
 358  if(havemechanize==False and httplibuse=="mechanize"):
 359   httplibuse = "urllib";
 360  if(httplibuse=="urllib"):
 361   returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize, sleep);
 362  elif(httplibuse=="requests"):
 363   returnval = download_from_url_file_with_requests(httpurl, httpheaders, httpcookie, buffersize, sleep);
 364  elif(httplibuse=="mechanize"):
 365   returnval = download_from_url_file_with_mechanize(httpurl, httpheaders, httpcookie, buffersize, sleep);
 366  else:
 367   returnval = False;
 368  return returnval;
 369
 370 def download_from_url_to_file(httpurl, httpheaders, httpcookie, httplibuse="urllib", outfile="-", outpath=os.getcwd(), buffersize=[524288, 524288], sleep=-1):
 371  global geturls_download_sleep, haverequests, havemechanize;
 372  if(sleep<0):
 373   sleep = geturls_download_sleep;
 374  if(httplibuse=="urllib1" or httplibuse=="urllib2"):
 375   httplibuse = "urllib";
 376  if(haverequests==False and httplibuse=="requests"):
 377   httplibuse = "urllib";
 378  if(havemechanize==False and httplibuse=="mechanize"):
 379   httplibuse = "urllib";
 380  if(httplibuse=="urllib"):
 381   returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpcookie, outfile, outpath, buffersize, sleep);
 382  elif(httplibuse=="requests"):
 383   returnval = download_from_url_to_file_with_requests(httpurl, httpheaders, httpcookie, outfile, outpath, buffersize, sleep);
 384  elif(httplibuse=="mechanize"):
 385   returnval = download_from_url_to_file_with_mechanize(httpurl, httpheaders, httpcookie, outfile, outpath, buffersize, sleep);
 386  else:
 387   returnval = False;
 388  return returnval;
 389
 390 def download_from_url_with_urllib(httpurl, httpheaders, httpcookie, sleep=-1):
 391  global geturls_download_sleep;
 392  if(sleep<0):
 393   sleep = geturls_download_sleep;
 394  geturls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(httpcookie));
 395  if(isinstance(httpheaders, dict)):
 396   httpheaders = make_http_headers_from_dict_to_list(httpheaders);
 397  geturls_opener.addheaders = httpheaders;
 398  time.sleep(sleep);
 399  geturls_text = geturls_opener.open(httpurl);
 400  log.info("Downloading URL "+httpurl);
 401  if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
 402   if(sys.version[0]=="2"):
 403    strbuf = StringIO(geturls_text.read());
 404   if(sys.version[0]>="3"):
 405    strbuf = BytesIO(geturls_text.read());
 406   gzstrbuf = gzip.GzipFile(fileobj=strbuf);
 407   returnval_content = gzstrbuf.read()[:];
 408  if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
 409   returnval_content = geturls_text.read()[:];
 410  returnval = {'Type': "Content", 'Content': returnval_content, 'Headers': dict(geturls_text.info()), 'URL': geturls_text.geturl(), 'Code': geturls_text.getcode()};
 411  geturls_text.close();
 412  return returnval;
 413
 414 def download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize=524288, sleep=-1):
 415  global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
 416  exec_time_start = time.time();
 417  myhash = hashlib.new("sha1");
 418  if(sys.version[0]=="2"):
 419   myhash.update(httpurl);
 420   myhash.update(str(buffersize));
 421   myhash.update(str(exec_time_start));
 422  if(sys.version[0]>="3"):
 423   myhash.update(httpurl.encode('utf-8'));
 424   myhash.update(str(buffersize).encode('utf-8'));
 425   myhash.update(str(exec_time_start).encode('utf-8'));
 426  newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
 427  if(sleep<0):
 428   sleep = geturls_download_sleep;
 429  geturls_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(httpcookie));
 430  if(isinstance(httpheaders, dict)):
 431   httpheaders = make_http_headers_from_dict_to_list(httpheaders);
 432  geturls_opener.addheaders = httpheaders;
 433  time.sleep(sleep);
 434  geturls_text = geturls_opener.open(httpurl);
 435  downloadsize = geturls_text.info().get('Content-Length');
 436  if(downloadsize is not None):
 437   downloadsize = int(downloadsize);
 438  if downloadsize is None: downloadsize = 0;
 439  fulldatasize = 0;
 440  prevdownsize = 0;
 441  log.info("Downloading URL "+httpurl);
 442  with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
 443   tmpfilename = f.name;
 444   returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'Headers': dict(geturls_text.info()), 'URL': geturls_text.geturl(), 'Code': geturls_text.getcode()};
 445   while True:
 446    databytes = geturls_text.read(buffersize);
 447    if not databytes: break;
 448    datasize = len(databytes);
 449    fulldatasize = datasize + fulldatasize;
 450    percentage = "";
 451    if(downloadsize>0):
 452     percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
 453    downloaddiff = fulldatasize - prevdownsize;
 454    log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
 455    prevdownsize = fulldatasize;
 456    f.write(databytes);
 457   f.close();
 458  geturls_text.close();
 459  exec_time_end = time.time();
 460  log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
 461  returnval.update({'Filesize': os.path.getsize(tmpfilename), 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
 462  return returnval;
 463
 464 def download_from_url_to_file_with_urllib(httpurl, httpheaders, httpcookie, outfile="-", outpath=os.getcwd(), buffersize=[524288, 524288], sleep=-1):
 465  global geturls_download_sleep;
 466  if(sleep<0):
 467   sleep = geturls_download_sleep;
 468  if(not outfile=="-"):
 469   outpath = outpath.rstrip(os.path.sep);
 470   filepath = os.path.realpath(outpath+os.path.sep+outfile);
 471   if(not os.path.exists(outpath)):
 472    os.makedirs(outpath);
 473   if(os.path.exists(outpath) and os.path.isfile(outpath)):
 474    return False;
 475   if(os.path.exists(filepath) and os.path.isdir(filepath)):
 476    return False;
 477   pretmpfilename = download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
 478   tmpfilename = pretmpfilename['Filename'];
 479   downloadsize = os.path.getsize(tmpfilename);
 480   fulldatasize = 0;
 481   log.info("Moving file "+tmpfilename+" to "+filepath);
 482   exec_time_start = time.time();
 483   shutil.move(tmpfilename, filepath);
 484   exec_time_end = time.time();
 485   log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
 486   if(os.path.exists(tmpfilename)==True):
 487    os.remove(tmpfilename);
 488   returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
 489  if(outfile=="-" and sys.version[0]=="2"):
 490   pretmpfilename = download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
 491   tmpfilename = pretmpfilename['Filename'];
 492   downloadsize = os.path.getsize(tmpfilename);
 493   fulldatasize = 0;
 494   prevdownsize = 0;
 495   exec_time_start = time.time();
 496   with open(tmpfilename, 'rb') as ft:
 497    f = StringIO();
 498    while True:
 499     databytes = ft.read(buffersize[1]);
 500     if not databytes: break;
 501     datasize = len(databytes);
 502     fulldatasize = datasize + fulldatasize;
 503     percentage = "";
 504     if(downloadsize>0):
 505      percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
 506     downloaddiff = fulldatasize - prevdownsize;
 507     log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
 508     prevdownsize = fulldatasize;
 509     f.write(databytes);
 510    f.seek(0);
 511    fdata = f.getvalue();
 512    f.close();
 513    ft.close();
 514    os.remove(tmpfilename);
 515    exec_time_end = time.time();
 516    log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
 517   returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
 518  if(outfile=="-" and sys.version[0]>="3"):
 519   pretmpfilename = download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
 520   tmpfilename = pretmpfilename['Filename'];
 521   downloadsize = os.path.getsize(tmpfilename);
 522   fulldatasize = 0;
 523   prevdownsize = 0;
 524   exec_time_start = time.time();
 525   with open(tmpfilename, 'rb') as ft:
 526    f = BytesIO();
 527    while True:
 528     databytes = ft.read(buffersize[1]);
 529     if not databytes: break;
 530     datasize = len(databytes);
 531     fulldatasize = datasize + fulldatasize;
 532     percentage = "";
 533     if(downloadsize>0):
 534      percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
 535     downloaddiff = fulldatasize - prevdownsize;
 536     log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
 537     prevdownsize = fulldatasize;
 538     f.write(databytes);
 539    f.seek(0);
 540    fdata = f.getvalue();
 541    f.close();
 542    ft.close();
 543    os.remove(tmpfilename);
 544    exec_time_end = time.time();
 545    log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
 546   returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
 547  return returnval;
 548
 549 if(haverequests==True):
 550  def download_from_url_with_requests(httpurl, httpheaders, httpcookie, sleep=-1):
 551   global geturls_download_sleep;
 552   if(sleep<0):
 553    sleep = geturls_download_sleep;
 554   if(isinstance(httpheaders, list)):
 555    httpheaders = make_http_headers_from_list_to_dict(httpheaders);
 556   time.sleep(sleep);
 557   geturls_text = requests.get(httpurl, headers=httpheaders, cookies=httpcookie);
 558   log.info("Downloading URL "+httpurl);
 559   if(geturls_text.headers.get('Content-Type')=="gzip" or geturls_text.headers.get('Content-Type')=="deflate"):
 560    if(sys.version[0]=="2"):
 561     strbuf = StringIO(geturls_text.content);
 562    if(sys.version[0]>="3"):
 563     strbuf = BytesIO(geturls_text.content);
 564    gzstrbuf = gzip.GzipFile(fileobj=strbuf);
 565    returnval_content = gzstrbuf.content[:];
 566   if(geturls_text.headers.get('Content-Type')!="gzip" and geturls_text.headers.get('Content-Type')!="deflate"):
 567    returnval_content = geturls_text.content[:];
 568   returnval = {'Type': "Content", 'Content': returnval_content, 'Headers': dict(geturls_text.headers), 'URL': geturls_text.url, 'Code': geturls_text.status_code};
 569   geturls_text.close();
 570   return returnval;
 571
 572 if(haverequests==False):
 573  def download_from_url_with_requests(httpurl, httpheaders, httpcookie, sleep=-1):
 574   returnval = download_from_url_with_urllib(httpurl, httpheaders, httpcookie, sleep)
 575   return returnval;
 576
 577 if(haverequests==True):
 578  def download_from_url_file_with_requests(httpurl, httpheaders, httpcookie, buffersize=524288, sleep=-1):
 579   global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
 580   exec_time_start = time.time();
 581   myhash = hashlib.new("sha1");
 582   if(sys.version[0]=="2"):
 583    myhash.update(httpurl);
 584    myhash.update(str(buffersize));
 585    myhash.update(str(exec_time_start));
 586   if(sys.version[0]>="3"):
 587    myhash.update(httpurl.encode('utf-8'));
 588    myhash.update(str(buffersize).encode('utf-8'));
 589    myhash.update(str(exec_time_start).encode('utf-8'));
 590   newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
 591   if(sleep<0):
 592    sleep = geturls_download_sleep;
 593   if(isinstance(httpheaders, list)):
 594    httpheaders = make_http_headers_from_list_to_dict(httpheaders);
 595   time.sleep(sleep);
 596   geturls_text = requests.get(httpurl, headers=httpheaders, cookies=httpcookie, stream=True);
 597   downloadsize = int(geturls_text.headers.get('Content-Length'));
 598   if(downloadsize is not None):
 599    downloadsize = int(downloadsize);
 600   if downloadsize is None: downloadsize = 0;
 601   fulldatasize = 0;
 602   prevdownsize = 0;
 603   log.info("Downloading URL "+httpurl);
 604   with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
 605    tmpfilename = f.name;
 606    returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'Headers': dict(geturls_text.headers), 'URL': geturls_text.url, 'Code': geturls_text.status_code};
 607    for databytes in geturls_text.iter_content(chunk_size=buffersize):
 608     datasize = len(databytes);
 609     fulldatasize = datasize + fulldatasize;
 610     percentage = "";
 611     if(downloadsize>0):
 612      percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
 613     downloaddiff = fulldatasize - prevdownsize;
 614     log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
 615     prevdownsize = fulldatasize;
 616     f.write(databytes);
 617    f.close();
 618   geturls_text.close();
 619   exec_time_end = time.time();
 620   log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
 621   returnval.update({'Filesize': os.path.getsize(tmpfilename), 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
 622   return returnval;
 623
 624 if(haverequests==False):
 625  def download_from_url_file_with_requests(httpurl, httpheaders, httpcookie, buffersize=524288, sleep=-1):
 626   returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize, sleep)
 627   return returnval;
 628
 629 if(haverequests==True):
 630  def download_from_url_to_file_with_requests(httpurl, httpheaders, httpcookie, outfile="-", outpath=os.getcwd(), buffersize=[524288, 524288], sleep=-1):
 631   global geturls_download_sleep;
 632   if(sleep<0):
 633    sleep = geturls_download_sleep;
 634   if(not outfile=="-"):
 635    outpath = outpath.rstrip(os.path.sep);
 636    filepath = os.path.realpath(outpath+os.path.sep+outfile);
 637    if(not os.path.exists(outpath)):
 638     os.makedirs(outpath);
 639    if(os.path.exists(outpath) and os.path.isfile(outpath)):
 640     return False;
 641    if(os.path.exists(filepath) and os.path.isdir(filepath)):
 642     return False;
 643    pretmpfilename = download_from_url_file_with_requests(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
 644    tmpfilename = pretmpfilename['Filename'];
 645    downloadsize = os.path.getsize(tmpfilename);
 646    fulldatasize = 0;
 647    log.info("Moving file "+tmpfilename+" to "+filepath);
 648    exec_time_start = time.time();
 649    shutil.move(tmpfilename, filepath);
 650    exec_time_end = time.time();
 651    log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
 652    if(os.path.exists(tmpfilename)==True):
 653     os.remove(tmpfilename);
 654    returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
 655   if(outfile=="-" and sys.version[0]=="2"):
 656    pretmpfilename = download_from_url_file_with_requests(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
 657    tmpfilename = pretmpfilename['Filename'];
 658    downloadsize = os.path.getsize(tmpfilename);
 659    fulldatasize = 0;
 660    prevdownsize = 0;
 661    exec_time_start = time.time();
 662    with open(tmpfilename, 'rb') as ft:
 663     f = StringIO();
 664     while True:
 665      databytes = ft.read(buffersize[1]);
 666      if not databytes: break;
 667      datasize = len(databytes);
 668      fulldatasize = datasize + fulldatasize;
 669      percentage = "";
 670      if(downloadsize>0):
 671       percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
 672      downloaddiff = fulldatasize - prevdownsize;
 673      log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
 674      prevdownsize = fulldatasize;
 675      f.write(databytes);
 676     f.seek(0);
 677     fdata = f.getvalue();
 678     f.close();
 679     ft.close();
 680     os.remove(tmpfilename);
 681     exec_time_end = time.time();
 682     log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
 683    returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
 684   if(outfile=="-" and sys.version[0]>="3"):
 685    pretmpfilename = download_from_url_file_with_requests(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
 686    tmpfilename = pretmpfilename['Filename'];
 687    downloadsize = os.path.getsize(tmpfilename);
 688    fulldatasize = 0;
 689    prevdownsize = 0;
 690    exec_time_start = time.time();
 691    with open(tmpfilename, 'rb') as ft:
 692     f = BytesIO();
 693     while True:
 694      databytes = ft.read(buffersize[1]);
 695      if not databytes: break;
 696      datasize = len(databytes);
 697      fulldatasize = datasize + fulldatasize;
 698      percentage = "";
 699      if(downloadsize>0):
 700       percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
 701      downloaddiff = fulldatasize - prevdownsize;
 702      log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
 703      prevdownsize = fulldatasize;
 704      f.write(databytes);
 705     f.seek(0);
 706     fdata = f.getvalue();
 707     f.close();
 708     ft.close();
 709     os.remove(tmpfilename);
 710     exec_time_end = time.time();
 711     log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
 712    returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
 713   return returnval;
 714
 715 if(haverequests==False):
 716  def download_from_url_to_file_with_requests(httpurl, httpheaders, httpcookie, outfile="-", outpath=os.getcwd(), buffersize=[524288, 524288], sleep=-1):
 717   returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize, outfile, outpath, sleep)
 718   return returnval;
 719
 720 if(havemechanize==True):
 721  def download_from_url_with_mechanize(httpurl, httpheaders, httpcookie, sleep=-1):
 722   global geturls_download_sleep;
 723   if(sleep<0):
 724    sleep = geturls_download_sleep;
 725   geturls_opener = mechanize.Browser();
 726   if(isinstance(httpheaders, dict)):
 727    httpheaders = make_http_headers_from_dict_to_list(httpheaders);
 728   time.sleep(sleep);
 729   geturls_opener.addheaders = httpheaders;
 730   geturls_opener.set_cookiejar(httpcookie);
 731   geturls_opener.set_handle_robots(False);
 732   geturls_text = geturls_opener.open(httpurl);
 733   log.info("Downloading URL "+httpurl);
 734   if(geturls_text.info().get("Content-Encoding")=="gzip" or geturls_text.info().get("Content-Encoding")=="deflate"):
 735    if(sys.version[0]=="2"):
 736     strbuf = StringIO(geturls_text.read());
 737    if(sys.version[0]>="3"):
 738     strbuf = BytesIO(geturls_text.read());
 739    gzstrbuf = gzip.GzipFile(fileobj=strbuf);
 740    returnval_content = gzstrbuf.read()[:];
 741   if(geturls_text.info().get("Content-Encoding")!="gzip" and geturls_text.info().get("Content-Encoding")!="deflate"):
 742    returnval_content = geturls_text.read()[:];
 743   returnval = {'Type': "Content", 'Content': returnval_content, 'Headers': dict(geturls_text.info()), 'URL': geturls_text.geturl(), 'Code': geturls_text.code};
 744   geturls_text.close();
 745   return returnval;
 746
 747 if(havemechanize==False):
 748  def download_from_url_with_mechanize(httpurl, httpheaders, httpcookie, sleep=-1):
 749   returnval = download_from_url_with_urllib(httpurl, httpheaders, httpcookie, sleep)
 750   return returnval;
 751
 752 if(havemechanize==True):
 753  def download_from_url_file_with_mechanize(httpurl, httpheaders, httpcookie, buffersize=524288, sleep=-1):
 754   global geturls_download_sleep, tmpfileprefix, tmpfilesuffix;
 755   exec_time_start = time.time();
 756   myhash = hashlib.new("sha1");
 757   if(sys.version[0]=="2"):
 758    myhash.update(httpurl);
 759    myhash.update(str(buffersize));
 760    myhash.update(str(exec_time_start));
 761   if(sys.version[0]>="3"):
 762    myhash.update(httpurl.encode('utf-8'));
 763    myhash.update(str(buffersize).encode('utf-8'));
 764    myhash.update(str(exec_time_start).encode('utf-8'));
 765   newtmpfilesuffix = tmpfilesuffix + str(myhash.hexdigest());
 766   if(sleep<0):
 767    sleep = geturls_download_sleep;
 768   geturls_opener = mechanize.Browser();
 769   if(isinstance(httpheaders, dict)):
 770    httpheaders = make_http_headers_from_dict_to_list(httpheaders);
 771   time.sleep(sleep);
 772   geturls_opener.addheaders = httpheaders;
 773   geturls_opener.set_cookiejar(httpcookie);
 774   geturls_opener.set_handle_robots(False);
 775   geturls_text = geturls_opener.open(httpurl);
 776   downloadsize = int(geturls_text.info().get('Content-Length'));
 777   if(downloadsize is not None):
 778    downloadsize = int(downloadsize);
 779   if downloadsize is None: downloadsize = 0;
 780   fulldatasize = 0;
 781   prevdownsize = 0;
 782   log.info("Downloading URL "+httpurl);
 783   with tempfile.NamedTemporaryFile('wb+', prefix=tmpfileprefix, suffix=newtmpfilesuffix, delete=False) as f:
 784    tmpfilename = f.name;
 785    returnval = {'Type': "File", 'Filename': tmpfilename, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'Headers': dict(geturls_text.info()), 'URL': geturls_text.geturl(), 'Code': geturls_text.code};
 786    while True:
 787     databytes = geturls_text.read(buffersize);
 788     if not databytes: break;
 789     datasize = len(databytes);
 790     fulldatasize = datasize + fulldatasize;
 791     percentage = "";
 792     if(downloadsize>0):
 793      percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
 794     downloaddiff = fulldatasize - prevdownsize;
 795     log.info("Downloading "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Downloaded "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
 796     prevdownsize = fulldatasize;
 797     f.write(databytes);
 798    f.close();
 799   geturls_text.close();
 800   exec_time_end = time.time();
 801   log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to download file.");
 802   returnval.update({'Filesize': os.path.getsize(tmpfilename), 'DownloadTime': float(exec_time_start - exec_time_end), 'DownloadTimeReadable': hms_string(exec_time_start - exec_time_end)});
 803   return returnval;
 804
 805 if(havemechanize==False):
 806  def download_from_url_file_with_mechanize(httpurl, httpheaders, httpcookie, buffersize=524288, sleep=-1):
 807   returnval = download_from_url_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize, sleep)
 808   return returnval;
 809
 810 if(havemechanize==True):
 811  def download_from_url_to_file_with_mechanize(httpurl, httpheaders, httpcookie, outfile="-", outpath=os.getcwd(), buffersize=[524288, 524288], sleep=-1):
 812   global geturls_download_sleep;
 813   if(sleep<0):
 814    sleep = geturls_download_sleep;
 815   if(not outfile=="-"):
 816    outpath = outpath.rstrip(os.path.sep);
 817    filepath = os.path.realpath(outpath+os.path.sep+outfile);
 818    if(not os.path.exists(outpath)):
 819     os.makedirs(outpath);
 820    if(os.path.exists(outpath) and os.path.isfile(outpath)):
 821     return False;
 822    if(os.path.exists(filepath) and os.path.isdir(filepath)):
 823     return False;
 824    pretmpfilename = download_from_url_file_with_mechanize(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
 825    tmpfilename = pretmpfilename['Filename'];
 826    downloadsize = os.path.getsize(tmpfilename);
 827    fulldatasize = 0;
 828    log.info("Moving file "+tmpfilename+" to "+filepath);
 829    exec_time_start = time.time();
 830    log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to move file.");
 831    shutil.move(tmpfilename, filepath);
 832    if(os.path.exists(tmpfilename)==True):
 833     os.remove(tmpfilename);
 834    returnval = {'Type': "File", 'Filename': filepath, 'Filesize': downloadsize, 'FilesizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
 835   if(outfile=="-" and sys.version[0]=="2"):
 836    pretmpfilename = download_from_url_file_with_mechanize(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
 837    tmpfilename = pretmpfilename['Filename'];
 838    downloadsize = os.path.getsize(tmpfilename);
 839    fulldatasize = 0;
 840    prevdownsize = 0;
 841    exec_time_start = time.time();
 842    with open(tmpfilename, 'rb') as ft:
 843     f = StringIO();
 844     while True:
 845      databytes = ft.read(buffersize[1]);
 846      if not databytes: break;
 847      datasize = len(databytes);
 848      fulldatasize = datasize + fulldatasize;
 849      percentage = "";
 850      if(downloadsize>0):
 851       percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
 852      downloaddiff = fulldatasize - prevdownsize;
 853      log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
 854      prevdownsize = fulldatasize;
 855      f.write(databytes);
 856     f.seek(0);
 857     fdata = f.getvalue();
 858     f.close();
 859     ft.close();
 860     os.remove(tmpfilename);
 861     exec_time_end = time.time();
 862     log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
 863    returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
 864   if(outfile=="-" and sys.version[0]>="3"):
 865    pretmpfilename = download_from_url_file_with_mechanize(httpurl, httpheaders, httpcookie, buffersize[0], sleep);
 866    tmpfilename = pretmpfilename['Filename'];
 867    downloadsize = os.path.getsize(tmpfilename);
 868    fulldatasize = 0;
 869    prevdownsize = 0;
 870    exec_time_start = time.time();
 871    with open(tmpfilename, 'rb') as ft:
 872     f = BytesIO();
 873     while True:
 874      databytes = ft.read(buffersize[1]);
 875      if not databytes: break;
 876      datasize = len(databytes);
 877      fulldatasize = datasize + fulldatasize;
 878      percentage = "";
 879      if(downloadsize>0):
 880       percentage = str("{0:.2f}".format(float(float(fulldatasize / downloadsize) * 100))).rstrip('0').rstrip('.')+"%";
 881      downloaddiff = fulldatasize - prevdownsize;
 882      log.info("Copying "+get_readable_size(fulldatasize, 2, "SI")['ReadableWithSuffix']+" / "+get_readable_size(downloadsize, 2, "SI")['ReadableWithSuffix']+" "+str(percentage)+" / Copied "+get_readable_size(downloaddiff, 2, "IEC")['ReadableWithSuffix']);
 883      prevdownsize = fulldatasize;
 884      f.write(databytes);
 885     f.seek(0);
 886     fdata = f.getvalue();
 887     f.close();
 888     ft.close();
 889     os.remove(tmpfilename);
 890     exec_time_end = time.time();
 891     log.info("It took "+hms_string(exec_time_start - exec_time_end)+" to copy file.");
 892    returnval = {'Type': "Content", 'Content': fdata, 'Contentsize': downloadsize, 'ContentsizeAlt': {'IEC': get_readable_size(downloadsize, 2, "IEC"), 'SI': get_readable_size(downloadsize, 2, "SI")}, 'DownloadTime': pretmpfilename['DownloadTime'], 'DownloadTimeReadable': pretmpfilename['DownloadTimeReadable'], 'MoveFileTime': float(exec_time_start - exec_time_end), 'MoveFileTimeReadable': hms_string(exec_time_start - exec_time_end), 'Headers': pretmpfilename['Headers'], 'URL': pretmpfilename['URL'], 'Code': pretmpfilename['Code']};
 893   return returnval;
 894
 895 if(havemechanize==False):
 896  def download_from_url_to_file_with_mechanize(httpurl, httpheaders, httpcookie, outfile="-", outpath=os.getcwd(), buffersize=[524288, 524288], sleep=-1):
 897   returnval = download_from_url_to_file_with_urllib(httpurl, httpheaders, httpcookie, buffersize, outfile, outpath, sleep)
 898   return returnval;