2 from classutil
import call_subprocess
5 # METHODS FOR AUTOMATIC DOWNLOADING OF RESOURCES
7 def copy_to_file(f
,ifile
=None,newpath
=None,blocksize
=8192000):
8 'copy from file obj f to ifile (or create newpath if given)'
9 if newpath
is not None:
10 ifile
= file(newpath
,'wb') # binary file
17 if newpath
is not None:
21 def do_gunzip(filepath
,newpath
=None):
22 'gunzip the target using Python gzip module'
23 from gzip
import GzipFile
25 newpath
= filepath
[:-3]
26 f
= GzipFile(filepath
)
27 copy_to_file(f
,newpath
=newpath
)
30 def run_gunzip(filepath
,newpath
=None):
31 'run gunzip program as a sub process'
33 newpath
= filepath
[:-3]
34 ifile
= open(newpath
, 'w+b')
36 if call_subprocess(['gunzip', '-c', filepath
], stdout
=ifile
):
37 raise OSError('gunzip "%s" failed!' % filepath
)
43 def run_unzip(filepath
,newpath
=None,singleFile
=False,**kwargs
):
44 '''run unzip program as a sub process,
45 save to single file newpath if desired.'''
47 newpath
= filepath
[:-4] # DROP THE .zip SUFFIX
48 if singleFile
: # concatenate all files into newpath
49 ifile
= file(newpath
, 'wb') # copy as binary file
51 status
= call_subprocess(['unzip', '-p', filepath
], stdout
=ifile
)
54 else: # just unzip the package as usual
55 status
= call_subprocess(['unzip', filepath
])
57 raise OSError('unzip "%s" failed!' % filepath
)
61 def create_dir_if_needed(path
):
62 'ensure that this directory exists, by creating it if needed'
64 if os
.path
.isdir(path
):
65 return # directory exists so nothing to do
66 create_dir_if_needed(os
.path
.dirname(path
)) # ensure parent exists
67 os
.mkdir(path
) # create this directory
69 def create_file_with_path(basepath
, filepath
):
70 'create file in write mode, creating parent directory(s) if needed'
72 newpath
= os
.path
.join(basepath
, filepath
)
73 create_dir_if_needed(os
.path
.dirname(newpath
))
74 return file(newpath
, 'wb') # copy as binary file
77 def do_unzip(filepath
, newpath
=None,singleFile
=False,**kwargs
):
78 'extract zip archive, to single file given by newpath if desired'
79 # WARNING: zipfile module reads entire file into memory!
81 newpath
= filepath
[:-4]
82 from zipfile
import ZipFile
83 t
= ZipFile(filepath
, 'r')
85 if singleFile
: # extract to a single file
86 ifile
= file(newpath
,'wb') # copy as binary file
88 for name
in t
.namelist():
89 ifile
.write(t
.read(name
)) # may run out of memory!!
92 else: # extract a bunch of files as usual
93 for name
in t
.namelist():
94 ifile
= create_file_with_path(newpath
, name
)
95 ifile
.write(t
.read(name
)) # may run out of memory!!
102 def do_untar(filepath
,mode
='r|',newpath
=None,singleFile
=False,**kwargs
):
103 'extract tar archive, to single file given by newpath if desired'
105 newpath
= filepath
+ '.out'
107 t
= tarfile
.open(filepath
, mode
)
109 if singleFile
: # extract to a single file
110 ifile
= file(newpath
,'wb') # copy as binary file
112 for name
in t
.getnames():
113 f
= t
.extractfile(name
)
114 copy_to_file(f
,ifile
)
117 else: # extract a bunch of files as usual
119 t
.extractall(os
.path
.dirname(newpath
))
125 def uncompress_file(filepath
,**kwargs
):
126 '''stub for applying appropriate uncompression based on file suffix
127 (.tar .tar.gz .tgz .tar.bz2 .gz and .zip for now)'''
128 if filepath
.endswith('.zip'):
129 logger
.info('unzipping %s...' % filepath
)
131 return run_unzip(filepath
,**kwargs
)
133 return do_unzip(filepath
, **kwargs
)
134 elif filepath
.endswith('.tar'):
135 logger
.info('untarring %s...' % filepath
)
136 return do_untar(filepath
,newpath
=filepath
[:-4],**kwargs
)
137 elif filepath
.endswith('.tgz'):
138 logger
.info('untarring %s...' % filepath
)
139 return do_untar(filepath
,mode
='r:gz',newpath
=filepath
[:-4],**kwargs
)
140 elif filepath
.endswith('.tar.gz'):
141 logger
.info('untarring %s...' % filepath
)
142 return do_untar(filepath
,mode
='r:gz',newpath
=filepath
[:-7],**kwargs
)
143 elif filepath
.endswith('.tar.bz2'):
144 logger
.info('untarring %s...' % filepath
)
145 return do_untar(filepath
,mode
='r:bz2',newpath
=filepath
[:-8],**kwargs
)
146 elif filepath
.endswith('.gz'):
147 logger
.info('gunzipping %s...' % filepath
)
148 try: # could use gzip module, but it's two times slower!!
149 return run_gunzip(filepath
, **kwargs
) # run as sub process
150 except OSError: # on Windows, have to run as python module
151 return do_gunzip(filepath
, **kwargs
)
153 return filepath
# DEFAULT: NOT COMPRESSED, SO JUST HAND BACK FILENAME
155 def download_monitor(bcount
, bsize
, totalsize
):
156 'show current download progress'
158 download_monitor
.percentage_last_shown
= 0.
159 bytes
= bcount
* bsize
160 percentage
= bytes
* 100. / totalsize
161 if percentage
>= 10. + download_monitor
.percentage_last_shown
:
162 logger
.info('downloaded %s bytes (%2.1f%%)...'
163 % (bytes
, percentage
))
164 download_monitor
.percentage_last_shown
= percentage
166 def download_unpickler(path
,filename
,kwargs
):
167 'try to download the desired file, and uncompress it if need be'
168 import urllib
,classutil
,os
170 filename
= os
.path
.basename(path
)
172 dl_dir
= os
.environ
['WORLDBASEDOWNLOAD']
174 dl_dir
= classutil
.get_env_or_cwd('PYGRDATADOWNLOAD')
175 filepath
= os
.path
.join(dl_dir
, filename
)
176 logger
.info('Beginning download of %s to %s...' % (path
, filepath
))
177 t
= urllib
.urlretrieve(path
,filepath
,download_monitor
)
178 logger
.info('Download done.')
179 filepath
= uncompress_file(filepath
, **kwargs
) # UNCOMPRESS IF NEEDED
180 # PATH TO WHERE THIS FILE IS NOW STORED
181 o
= classutil
.SourceFileName(filepath
)
182 o
._saveLocalBuild
= True # MARK THIS FOR SAVING IN LOCAL PYGR.DATA
184 download_unpickler
.__safe
_for
_unpickling
__ = 1
186 class SourceURL(object):
187 '''unpickling this object will trigger downloading of the desired path,
188 which will be cached to WORLDBASEDOWNLOAD directory if any.
189 The value returned from unpickling will simply be the path to the
190 downloaded file, as a SourceFileName'''
191 _worldbase_no_cache
= True # force worldbase to always re-load this class
192 def __init__(self
,path
,filename
=None,**kwargs
):
195 self
.filename
= filename
196 if path
.startswith('http:'): # make sure we can read this URL
198 conn
= httplib
.HTTPConnection(path
.split('/')[2])
200 conn
.request('GET', '/'.join([''] + path
.split('/')[3:]))
201 r1
= conn
.getresponse()
203 raise OSError('http GET failed: %d %s, %s'
204 %(r1
.status
,r1
.reason
,path
))
207 def __reduce__(self
):
208 return (download_unpickler
,(self
.path
,self
.filename
,self
.kwargs
))
210 def generic_build_unpickler(cname
, args
, kwargs
):
211 'does nothing but construct the specified klass with the specified args'
213 from seqdb
import BlastDB
as klass
215 raise ValueError('''class name not registered for unpickling security.
216 Add it to pygr.downloader.generic_build_unpickler if needed: ''' + cname
)
217 o
= klass(*args
, **kwargs
)
218 o
._saveLocalBuild
= True # MARK FOR LOCAL PYGR.DATA SAVE
220 generic_build_unpickler
.__safe
_for
_unpickling
__ = 1
222 class GenericBuilder(object):
223 'proxy for constructing the desired klass on unpickling'
224 _worldbase_no_cache
= True # force worldbase to always re-load this class
225 def __init__(self
, cname
, *args
, **kwargs
):
229 def __reduce__(self
):
230 return (generic_build_unpickler
,(self
.cname
,self
.args
,self
.kwargs
))