added SQLTable pickle test
[pygr.git] / pygr / downloader.py
blob88692b56aca5ebda4a5a9365cc3aae6a1ad3f1a5
1 import sys, os
2 from classutil import call_subprocess
3 import logger
5 # METHODS FOR AUTOMATIC DOWNLOADING OF RESOURCES
7 def copy_to_file(f,ifile=None,newpath=None,blocksize=8192000):
8 'copy from file obj f to ifile (or create newpath if given)'
9 if newpath is not None:
10 ifile = file(newpath,'wb') # binary file
11 try:
12 while True:
13 s = f.read(blocksize)
14 if s == '': break
15 ifile.write(s)
16 finally:
17 if newpath is not None:
18 ifile.close()
19 f.close()
21 def do_gunzip(filepath,newpath=None):
22 'gunzip the target using Python gzip module'
23 from gzip import GzipFile
24 if newpath is None:
25 newpath = filepath[:-3]
26 f = GzipFile(filepath)
27 copy_to_file(f,newpath=newpath)
28 return newpath
30 def run_gunzip(filepath,newpath=None):
31 'run gunzip program as a sub process'
32 if newpath is None:
33 newpath = filepath[:-3]
34 ifile = open(newpath, 'w+b')
35 try:
36 if call_subprocess(['gunzip', '-c', filepath], stdout=ifile):
37 raise OSError('gunzip "%s" failed!' % filepath)
38 finally:
39 ifile.close()
40 return newpath
43 def run_unzip(filepath,newpath=None,singleFile=False,**kwargs):
44 '''run unzip program as a sub process,
45 save to single file newpath if desired.'''
46 if newpath is None:
47 newpath = filepath[:-4] # DROP THE .zip SUFFIX
48 if singleFile: # concatenate all files into newpath
49 ifile = file(newpath, 'wb') # copy as binary file
50 try:
51 status = call_subprocess(['unzip', '-p', filepath], stdout=ifile)
52 finally:
53 ifile.close()
54 else: # just unzip the package as usual
55 status = call_subprocess(['unzip', filepath])
56 if status != 0:
57 raise OSError('unzip "%s" failed!' % filepath)
58 return newpath
61 def create_dir_if_needed(path):
62 'ensure that this directory exists, by creating it if needed'
63 import os
64 if os.path.isdir(path):
65 return # directory exists so nothing to do
66 create_dir_if_needed(os.path.dirname(path)) # ensure parent exists
67 os.mkdir(path) # create this directory
69 def create_file_with_path(basepath, filepath):
70 'create file in write mode, creating parent directory(s) if needed'
71 import os.path
72 newpath = os.path.join(basepath, filepath)
73 create_dir_if_needed(os.path.dirname(newpath))
74 return file(newpath, 'wb') # copy as binary file
77 def do_unzip(filepath, newpath=None,singleFile=False,**kwargs):
78 'extract zip archive, to single file given by newpath if desired'
79 # WARNING: zipfile module reads entire file into memory!
80 if newpath is None:
81 newpath = filepath[:-4]
82 from zipfile import ZipFile
83 t = ZipFile(filepath, 'r')
84 try:
85 if singleFile: # extract to a single file
86 ifile = file(newpath,'wb') # copy as binary file
87 try:
88 for name in t.namelist():
89 ifile.write(t.read(name)) # may run out of memory!!
90 finally:
91 ifile.close()
92 else: # extract a bunch of files as usual
93 for name in t.namelist():
94 ifile = create_file_with_path(newpath, name)
95 ifile.write(t.read(name)) # may run out of memory!!
96 ifile.close()
97 finally:
98 t.close()
99 return newpath
102 def do_untar(filepath,mode='r|',newpath=None,singleFile=False,**kwargs):
103 'extract tar archive, to single file given by newpath if desired'
104 if newpath is None:
105 newpath = filepath + '.out'
106 import tarfile
107 t = tarfile.open(filepath, mode)
108 try:
109 if singleFile: # extract to a single file
110 ifile = file(newpath,'wb') # copy as binary file
111 try:
112 for name in t.getnames():
113 f = t.extractfile(name)
114 copy_to_file(f,ifile)
115 finally:
116 ifile.close()
117 else: # extract a bunch of files as usual
118 import os
119 t.extractall(os.path.dirname(newpath))
120 finally:
121 t.close()
122 return newpath
125 def uncompress_file(filepath,**kwargs):
126 '''stub for applying appropriate uncompression based on file suffix
127 (.tar .tar.gz .tgz .tar.bz2 .gz and .zip for now)'''
128 if filepath.endswith('.zip'):
129 logger.info('unzipping %s...' % filepath)
130 try:
131 return run_unzip(filepath,**kwargs)
132 except OSError:
133 return do_unzip(filepath, **kwargs)
134 elif filepath.endswith('.tar'):
135 logger.info('untarring %s...' % filepath)
136 return do_untar(filepath,newpath=filepath[:-4],**kwargs)
137 elif filepath.endswith('.tgz'):
138 logger.info('untarring %s...' % filepath)
139 return do_untar(filepath,mode='r:gz',newpath=filepath[:-4],**kwargs)
140 elif filepath.endswith('.tar.gz'):
141 logger.info('untarring %s...' % filepath)
142 return do_untar(filepath,mode='r:gz',newpath=filepath[:-7],**kwargs)
143 elif filepath.endswith('.tar.bz2'):
144 logger.info('untarring %s...' % filepath)
145 return do_untar(filepath,mode='r:bz2',newpath=filepath[:-8],**kwargs)
146 elif filepath.endswith('.gz'):
147 logger.info('gunzipping %s...' % filepath)
148 try: # could use gzip module, but it's two times slower!!
149 return run_gunzip(filepath, **kwargs) # run as sub process
150 except OSError: # on Windows, have to run as python module
151 return do_gunzip(filepath, **kwargs)
153 return filepath # DEFAULT: NOT COMPRESSED, SO JUST HAND BACK FILENAME
155 def download_monitor(bcount, bsize, totalsize):
156 'show current download progress'
157 if bcount == 0:
158 download_monitor.percentage_last_shown = 0.
159 bytes = bcount * bsize
160 percentage = bytes * 100. / totalsize
161 if percentage >= 10. + download_monitor.percentage_last_shown:
162 logger.info('downloaded %s bytes (%2.1f%%)...'
163 % (bytes, percentage))
164 download_monitor.percentage_last_shown = percentage
166 def download_unpickler(path,filename,kwargs):
167 'try to download the desired file, and uncompress it if need be'
168 import urllib,classutil,os
169 if filename is None:
170 filename = os.path.basename(path)
171 try:
172 dl_dir = os.environ['WORLDBASEDOWNLOAD']
173 except KeyError:
174 dl_dir = classutil.get_env_or_cwd('PYGRDATADOWNLOAD')
175 filepath = os.path.join(dl_dir, filename)
176 logger.info('Beginning download of %s to %s...' % (path, filepath))
177 t = urllib.urlretrieve(path,filepath,download_monitor)
178 logger.info('Download done.')
179 filepath = uncompress_file(filepath, **kwargs) # UNCOMPRESS IF NEEDED
180 # PATH TO WHERE THIS FILE IS NOW STORED
181 o = classutil.SourceFileName(filepath)
182 o._saveLocalBuild = True # MARK THIS FOR SAVING IN LOCAL PYGR.DATA
183 return o
184 download_unpickler.__safe_for_unpickling__ = 1
186 class SourceURL(object):
187 '''unpickling this object will trigger downloading of the desired path,
188 which will be cached to WORLDBASEDOWNLOAD directory if any.
189 The value returned from unpickling will simply be the path to the
190 downloaded file, as a SourceFileName'''
191 _worldbase_no_cache = True # force worldbase to always re-load this class
192 def __init__(self,path,filename=None,**kwargs):
193 self.path = path
194 self.kwargs = kwargs
195 self.filename = filename
196 if path.startswith('http:'): # make sure we can read this URL
197 import httplib
198 conn = httplib.HTTPConnection(path.split('/')[2])
199 try:
200 conn.request('GET', '/'.join([''] + path.split('/')[3:]))
201 r1 = conn.getresponse()
202 if r1.status!=200:
203 raise OSError('http GET failed: %d %s, %s'
204 %(r1.status,r1.reason,path))
205 finally:
206 conn.close()
207 def __reduce__(self):
208 return (download_unpickler,(self.path,self.filename,self.kwargs))
210 def generic_build_unpickler(cname, args, kwargs):
211 'does nothing but construct the specified klass with the specified args'
212 if cname=='BlastDB':
213 from seqdb import BlastDB as klass
214 else:
215 raise ValueError('''class name not registered for unpickling security.
216 Add it to pygr.downloader.generic_build_unpickler if needed: ''' + cname)
217 o = klass(*args, **kwargs)
218 o._saveLocalBuild = True # MARK FOR LOCAL PYGR.DATA SAVE
219 return o
220 generic_build_unpickler.__safe_for_unpickling__ = 1
222 class GenericBuilder(object):
223 'proxy for constructing the desired klass on unpickling'
224 _worldbase_no_cache = True # force worldbase to always re-load this class
225 def __init__(self, cname, *args, **kwargs):
226 self.cname = cname
227 self.args = args
228 self.kwargs = kwargs
229 def __reduce__(self):
230 return (generic_build_unpickler,(self.cname,self.args,self.kwargs))