pygr/downloader.py

   1 import sys, os
   2 from classutil import call_subprocess
   3 import logger
   4
   5 # METHODS FOR AUTOMATIC DOWNLOADING OF RESOURCES
   6
   7 def copy_to_file(f,ifile=None,newpath=None,blocksize=8192000):
   8     'copy from file obj f to ifile (or create newpath if given)'
   9     if newpath is not None:
  10         ifile = file(newpath,'wb') # binary file
  11     try:
  12         while True:
  13             s = f.read(blocksize)
  14             if s == '': break
  15             ifile.write(s)
  16     finally:
  17         if newpath is not None:
  18             ifile.close()
  19         f.close()
  20
  21 def do_gunzip(filepath,newpath=None):
  22     'gunzip the target using Python gzip module'
  23     from gzip import GzipFile
  24     if newpath is None:
  25         newpath = filepath[:-3]
  26     f = GzipFile(filepath)
  27     copy_to_file(f,newpath=newpath)
  28     return newpath
  29
  30 def run_gunzip(filepath,newpath=None):
  31     'run gunzip program as a sub process'
  32     if newpath is None:
  33         newpath = filepath[:-3]
  34     ifile = open(newpath, 'w+b')
  35     try:
  36         if call_subprocess(['gunzip', '-c', filepath], stdout=ifile):
  37             raise OSError('gunzip "%s" failed!' % filepath)
  38     finally:
  39         ifile.close()
  40     return newpath
  41
  42
  43 def run_unzip(filepath,newpath=None,singleFile=False,**kwargs):
  44     '''run unzip program as a sub process,
  45     save to single file newpath if desired.'''
  46     if newpath is None:
  47         newpath = filepath[:-4] # DROP THE .zip SUFFIX
  48     if singleFile: # concatenate all files into newpath
  49         ifile = file(newpath, 'wb') # copy as binary file
  50         try:
  51             status = call_subprocess(['unzip', '-p', filepath], stdout=ifile)
  52         finally:
  53             ifile.close()
  54     else: # just unzip the package as usual
  55         status = call_subprocess(['unzip', filepath])
  56     if status != 0:
  57         raise OSError('unzip "%s" failed!' % filepath)
  58     return newpath
  59
  60
  61 def create_dir_if_needed(path):
  62     'ensure that this directory exists, by creating it if needed'
  63     import os
  64     if os.path.isdir(path):
  65         return # directory exists so nothing to do
  66     create_dir_if_needed(os.path.dirname(path)) # ensure parent exists
  67     os.mkdir(path) # create this directory
  68
  69 def create_file_with_path(basepath, filepath):
  70     'create file in write mode, creating parent directory(s) if needed'
  71     import os.path
  72     newpath = os.path.join(basepath, filepath)
  73     create_dir_if_needed(os.path.dirname(newpath))
  74     return file(newpath, 'wb') # copy as binary file
  75
  76
  77 def do_unzip(filepath, newpath=None,singleFile=False,**kwargs):
  78     'extract zip archive, to single file given by newpath if desired'
  79     # WARNING: zipfile module reads entire file into memory!
  80     if newpath is None:
  81         newpath = filepath[:-4]
  82     from zipfile import ZipFile
  83     t = ZipFile(filepath, 'r')
  84     try:
  85         if singleFile: # extract to a single file
  86             ifile = file(newpath,'wb') # copy as binary file
  87             try:
  88                 for name in t.namelist():
  89                     ifile.write(t.read(name)) # may run out of memory!!
  90             finally:
  91                 ifile.close()
  92         else: # extract a bunch of files as usual
  93             for name in t.namelist():
  94                 ifile = create_file_with_path(newpath, name)
  95                 ifile.write(t.read(name)) # may run out of memory!!
  96                 ifile.close()
  97     finally:
  98         t.close()
  99     return newpath
 100
 101
 102 def do_untar(filepath,mode='r|',newpath=None,singleFile=False,**kwargs):
 103     'extract tar archive, to single file given by newpath if desired'
 104     if newpath is None:
 105         newpath = filepath + '.out'
 106     import tarfile
 107     t = tarfile.open(filepath, mode)
 108     try:
 109         if singleFile: # extract to a single file
 110             ifile = file(newpath,'wb') # copy as binary file
 111             try:
 112                 for name in t.getnames():
 113                     f = t.extractfile(name)
 114                     copy_to_file(f,ifile)
 115             finally:
 116                 ifile.close()
 117         else: # extract a bunch of files as usual
 118             import os
 119             t.extractall(os.path.dirname(newpath))
 120     finally:
 121         t.close()
 122     return newpath
 123
 124
 125 def uncompress_file(filepath,**kwargs):
 126     '''stub for applying appropriate uncompression based on file suffix
 127     (.tar .tar.gz .tgz .tar.bz2 .gz and .zip for now)'''
 128     if filepath.endswith('.zip'):
 129         logger.info('unzipping %s...' % filepath)
 130         try:
 131             return run_unzip(filepath,**kwargs)
 132         except OSError:
 133             return do_unzip(filepath, **kwargs)
 134     elif filepath.endswith('.tar'):
 135         logger.info('untarring %s...' % filepath)
 136         return do_untar(filepath,newpath=filepath[:-4],**kwargs)
 137     elif filepath.endswith('.tgz'):
 138         logger.info('untarring %s...' % filepath)
 139         return do_untar(filepath,mode='r:gz',newpath=filepath[:-4],**kwargs)
 140     elif filepath.endswith('.tar.gz'):
 141         logger.info('untarring %s...' % filepath)
 142         return do_untar(filepath,mode='r:gz',newpath=filepath[:-7],**kwargs)
 143     elif filepath.endswith('.tar.bz2'):
 144         logger.info('untarring %s...' % filepath)
 145         return do_untar(filepath,mode='r:bz2',newpath=filepath[:-8],**kwargs)
 146     elif filepath.endswith('.gz'):
 147         logger.info('gunzipping %s...' % filepath)
 148         try:  # could use gzip module, but it's two times slower!!
 149             return run_gunzip(filepath, **kwargs) # run as sub process
 150         except OSError: # on Windows, have to run as python module
 151             return do_gunzip(filepath, **kwargs)
 152
 153     return filepath # DEFAULT: NOT COMPRESSED, SO JUST HAND BACK FILENAME
 154
 155 def download_monitor(bcount, bsize, totalsize):
 156     'show current download progress'
 157     if bcount == 0:
 158         download_monitor.percentage_last_shown = 0.
 159     bytes = bcount * bsize
 160     percentage = bytes * 100. / totalsize
 161     if percentage >= 10. + download_monitor.percentage_last_shown:
 162         logger.info('downloaded %s bytes (%2.1f%%)...'
 163                     % (bytes, percentage))
 164         download_monitor.percentage_last_shown = percentage
 165
 166 def download_unpickler(path,filename,kwargs):
 167     'try to download the desired file, and uncompress it if need be'
 168     import urllib,classutil,os
 169     if filename is None:
 170         filename = os.path.basename(path)
 171     try:
 172         dl_dir = os.environ['WORLDBASEDOWNLOAD']
 173     except KeyError:
 174         dl_dir = classutil.get_env_or_cwd('PYGRDATADOWNLOAD')
 175     filepath = os.path.join(dl_dir, filename)
 176     logger.info('Beginning download of %s to %s...' % (path, filepath))
 177     t = urllib.urlretrieve(path,filepath,download_monitor)
 178     logger.info('Download done.')
 179     filepath = uncompress_file(filepath, **kwargs) # UNCOMPRESS IF NEEDED
 180     # PATH TO WHERE THIS FILE IS NOW STORED
 181     o = classutil.SourceFileName(filepath)
 182     o._saveLocalBuild = True # MARK THIS FOR SAVING IN LOCAL PYGR.DATA
 183     return o
 184 download_unpickler.__safe_for_unpickling__ = 1
 185
 186 class SourceURL(object):
 187     '''unpickling this object will trigger downloading of the desired path,
 188     which will be cached to WORLDBASEDOWNLOAD directory if any.
 189     The value returned from unpickling will simply be the path to the
 190     downloaded file, as a SourceFileName'''
 191     _worldbase_no_cache = True # force worldbase to always re-load this class
 192     def __init__(self,path,filename=None,**kwargs):
 193         self.path = path
 194         self.kwargs = kwargs
 195         self.filename = filename
 196         if path.startswith('http:'): # make sure we can read this URL
 197             import httplib
 198             conn = httplib.HTTPConnection(path.split('/')[2])
 199             try:
 200                 conn.request('GET', '/'.join([''] + path.split('/')[3:]))
 201                 r1 = conn.getresponse()
 202                 if r1.status!=200:
 203                     raise OSError('http GET failed: %d %s, %s'
 204                                   %(r1.status,r1.reason,path))
 205             finally:
 206                 conn.close()
 207     def __reduce__(self):
 208         return (download_unpickler,(self.path,self.filename,self.kwargs))
 209
 210 def generic_build_unpickler(cname, args, kwargs):
 211     'does nothing but construct the specified klass with the specified args'
 212     if cname=='BlastDB':
 213         from seqdb import BlastDB as klass
 214     else:
 215         raise ValueError('''class name not registered for unpickling security.
 216 Add it to pygr.downloader.generic_build_unpickler if needed: ''' + cname)
 217     o = klass(*args, **kwargs)
 218     o._saveLocalBuild = True # MARK FOR LOCAL PYGR.DATA SAVE
 219     return o
 220 generic_build_unpickler.__safe_for_unpickling__ = 1
 221
 222 class GenericBuilder(object):
 223     'proxy for constructing the desired klass on unpickling'
 224     _worldbase_no_cache = True # force worldbase to always re-load this class
 225     def __init__(self, cname, *args, **kwargs):
 226         self.cname = cname
 227         self.args = args
 228         self.kwargs = kwargs
 229     def __reduce__(self):
 230         return (generic_build_unpickler,(self.cname,self.args,self.kwargs))