1 from __future__
import generators
7 def getAnnotationAttr(self
,attr
):
8 'forward attributes from slice object if available'
9 return self
.db
.getSliceAttr(self
.db
.sliceDB
[self
.id], attr
)
11 def annotation_repr(self
):
12 if self
.annotationType
is not None:
13 title
= self
.annotationType
16 if self
.orientation
>0:
17 return '%s%s[%d:%d]' % (title
,self
.id,self
.start
,self
.stop
)
19 return '-%s%s[%d:%d]' % (title
,self
.id,-self
.stop
,-self
.start
)
21 class AnnotationSeqDescr(object):
22 'get the sequence interval corresponding to this annotation'
23 def __get__(self
,obj
,objtype
):
24 return absoluteSlice(obj
._anno
_seq
,obj
._anno
_start
,obj
._anno
_start
+obj
.stop
)
25 class AnnotationSliceDescr(object):
26 'get the sequence interval corresponding to this annotation'
27 def __get__(self
,obj
,objtype
):
28 return relativeSlice(obj
.pathForward
.sequence
,obj
.start
,obj
.stop
)
29 class AnnotationSeqtypeDescr(object):
30 'get seqtype of the sequence interval corresponding to this annotation'
31 def __get__(self
,obj
,objtype
):
32 return obj
._anno
_seq
.seqtype()
34 class AnnotationSeq(SeqPath
):
35 'base class representing an annotation'
39 def __init__(self
,id,db
,parent
,start
,stop
):
42 self
.stop
= stop
-start
43 self
._anno
_seq
= parent
44 self
._anno
_start
= start
46 __getattr__
= getAnnotationAttr
47 sequence
= AnnotationSeqDescr()
48 annotationType
= classutil
.DBAttributeDescr('annotationType')
49 _seqtype
= AnnotationSeqtypeDescr()
50 __repr__
= annotation_repr
51 def __cmp__(self
, other
):
52 if not isinstance(other
, AnnotationSeq
):
54 if cmp(self
.sequence
, other
.sequence
) == 0:
55 if self
.id == other
.id and self
.db
is other
.db
:
56 return cmp((self
.start
,self
.stop
),(other
.start
,other
.stop
))
57 return NOT_ON_SAME_PATH
59 def strslice(self
,start
,stop
):
60 raise ValueError('''this is an annotation, and you cannot get a sequence string from it.
61 Use its sequence attribute to get a sequence object representing this interval.''')
63 class AnnotationSlice(SeqDBSlice
):
64 'represents subslice of an annotation'
65 __getattr__
=getAnnotationAttr
66 sequence
= AnnotationSliceDescr()
67 annotationType
= classutil
.DBAttributeDescr('annotationType')
68 __repr__
= annotation_repr
70 class TranslationAnnotSeqDescr(object):
71 'get the sequence interval corresponding to this annotation'
72 def __get__(self
,obj
,objtype
):
73 return absoluteSlice(obj
._anno
_seq
, obj
._anno
_start
, obj
._anno
_stop
)
75 class TranslationAnnotFrameDescr(object):
76 """Get the frame of this protein translation, relative to original DNA."""
77 def __get__(self
, obj
, objtype
):
78 orig
= obj
.pathForward
.sequence
79 if orig
.orientation
> 0:
80 frame
= (orig
.start
% 3) + 1
82 return -((orig
.start
+ 1) % 3 + 1)
85 class TranslationAnnot(AnnotationSeq
):
86 'annotation representing aa translation of a given nucleotide interval'
87 def __init__(self
, id, db
, parent
, start
, stop
):
88 AnnotationSeq
.__init
__(self
, id, db
, parent
, start
, stop
)
90 self
._anno
_stop
= stop
91 sequence
= TranslationAnnotSeqDescr()
92 frame
= TranslationAnnotFrameDescr()
93 _seqtype
= PROTEIN_SEQTYPE
94 def strslice(self
, start
, stop
):
95 'get the aa translation of our associated ORF'
97 aa
= self
._translation
98 except AttributeError:
99 aa
= self
._translation
= translate_orf(str(self
.sequence
))
100 return aa
[start
:stop
]
102 class TranslationAnnotSliceDescr(object):
103 'get the sequence interval corresponding to this annotation'
104 def __get__(self
,obj
,objtype
):
105 return relativeSlice(obj
.pathForward
.sequence
, 3*obj
.start
, 3*obj
.stop
)
107 class TranslationAnnotSlice(AnnotationSlice
):
108 sequence
= TranslationAnnotSliceDescr()
109 frame
= TranslationAnnotFrameDescr()
112 class AnnotationDB(object, UserDict
.DictMixin
):
113 'container of annotations as specific slices of db sequences'
114 def __init__(self
, sliceDB
, seqDB
, annotationType
=None,
115 itemClass
=AnnotationSeq
,
116 itemSliceClass
=AnnotationSlice
,
117 itemAttrDict
=None, # GET RID OF THIS BACKWARDS-COMPATIBILITY KLUGE!!
118 sliceAttrDict
=None,maxCache
=None, autoGC
=True,
119 checkFirstID
=True, **kwargs
):
120 '''sliceDB must map identifier to a sliceInfo object;
121 sliceInfo must have attributes: id, start, stop, orientation;
122 seqDB must map sequence ID to a sliceable sequence object;
123 sliceAttrDict gives optional dict of item attributes that
124 should be mapped to sliceDB item attributes.
125 maxCache specfies the maximum number of annotation objects to keep in the cache.'''
126 if autoGC
: # automatically garbage collect unused objects
127 self
._weakValueDict
= classutil
.RecentValueDictionary(autoGC
)
129 self
._weakValueDict
= {} # object cache
131 if sliceAttrDict
is None:
133 if sliceDB
is not None:
134 self
.sliceDB
= sliceDB
135 else: # NEED TO CREATE / OPEN A DATABASE FOR THE USER
136 self
.sliceDB
= classutil
.get_shelve_or_dict(**kwargs
)
138 self
.annotationType
= annotationType
139 self
.itemClass
=itemClass
140 self
.itemSliceClass
=itemSliceClass
141 self
.sliceAttrDict
=sliceAttrDict
# USER-PROVIDED ALIASES
142 if maxCache
is not None:
143 self
.maxCache
= maxCache
145 try: # don't cache anything now; schema may change itemClass!
146 k
= iter(self
).next() # get the first ID if any
147 self
.get_annot_obj(k
, self
.sliceDB
[k
]) # valid annotation?
148 except KeyError: # a convenient warning to the user...
150 cannot create annotation object %s; sequence database %s may not be correct'''
152 except StopIteration:
153 pass # dataset is empty so there is nothing we can check...
154 __getstate__
= classutil
.standard_getstate
############### PICKLING METHODS
155 __setstate__
= classutil
.standard_setstate
156 _pickleAttrs
= dict(sliceDB
=0,seqDB
=0,annotationType
=0, autoGC
=0,
157 itemClass
=0,itemSliceClass
=0,sliceAttrDict
=0,maxCache
=0)
158 def __hash__(self
): # @CTB unnecessary??
159 'ALLOW THIS OBJECT TO BE USED AS A KEY IN DICTS...'
161 def __getitem__(self
,k
):
162 'get annotation object by its ID'
163 try: # GET FROM OUR CACHE
164 return self
._weakValueDict
[k
]
167 return self
.sliceAnnotation(k
,self
.sliceDB
[k
])
168 def __setitem__(self
,k
,v
):
169 raise KeyError('''you cannot save annotations directly using annoDB[k] = v
170 Instead, use annoDB.new_annotation(k,sliceInfo) where sliceInfo provides
171 a sequence ID, start, stop (and any additional info desired), and will be
172 saved directly to the sliceDB.''')
173 def getSliceAttr(self
,sliceInfo
,attr
):
175 k
= self
.sliceAttrDict
[attr
] # USE ALIAS IF PROVIDED
177 return getattr(sliceInfo
,attr
) # GET ATTRIBUTE AS USUAL
178 try: # REMAP TO ANOTHER ATTRIBUTE NAME
179 return getattr(sliceInfo
,k
)
180 except TypeError: # TREAT AS int INDEX INTO A TUPLE
182 def get_annot_obj(self
, k
, sliceInfo
):
183 'create an annotation object based on the input sliceInfo'
184 start
= int(self
.getSliceAttr(sliceInfo
,'start'))
185 stop
= int(self
.getSliceAttr(sliceInfo
,'stop'))
188 orientation
= self
.getSliceAttr(sliceInfo
, 'orientation')
189 orientation
= int(orientation
)
190 if orientation
< 0 and start
>= 0:
191 start
,stop
= (-stop
, -start
) # NEGATIVE ORIENTATION COORDINATES
192 except (AttributeError, IndexError):
193 pass # ok if no orientation is specified.
196 raise IndexError('annotation %s has zero or negative length [%s:%s]!'
198 seq_id
= self
.getSliceAttr(sliceInfo
, 'id')
199 seq
= self
.seqDB
[seq_id
]
200 return self
.itemClass(k
, self
, seq
, start
, stop
)
201 def sliceAnnotation(self
,k
,sliceInfo
,limitCache
=True):
202 'create annotation and cache it'
203 a
= self
.get_annot_obj(k
, sliceInfo
)
204 try: # APPLY CACHE SIZE LIMIT IF ANY
205 if limitCache
and self
.maxCache
<len(self
._weakValueDict
):
206 self
._weakValueDict
.clear()
207 except AttributeError:
209 self
._weakValueDict
[k
] = a
# CACHE THIS IN OUR DICT
211 def new_annotation(self
,k
,sliceInfo
):
212 'save sliceInfo to the annotation database and return annotation object'
213 a
= self
.sliceAnnotation(k
,sliceInfo
) # 1st CHECK IT GIVES A VALID ANNOTATION
215 self
.sliceDB
[k
] = sliceInfo
# NOW SAVE IT TO THE SLICE DATABASE
218 del self
._weakValueDict
[k
] # DELETE FROM CACHE
222 self
._wroteSliceDB
= True
224 def foreignKey(self
,attr
,k
):
225 'iterate over items matching specified foreign key'
226 for t
in self
.sliceDB
.foreignKey(attr
,k
):
227 try: # get from cache if exists
228 yield self
._weakValueDict
[t
.id]
230 yield self
.sliceAnnotation(t
.id,t
)
231 def __contains__(self
, k
): return k
in self
.sliceDB
232 def __len__(self
): return len(self
.sliceDB
)
233 def __iter__(self
): return iter(self
.sliceDB
) ########## ITERATORS
234 def keys(self
): return self
.sliceDB
.keys()
236 'uses maxCache to manage caching of annotation objects'
237 for k
,sliceInfo
in self
.sliceDB
.iteritems():
238 yield k
,self
.sliceAnnotation(k
,sliceInfo
)
239 def itervalues(self
):
240 'uses maxCache to manage caching of annotation objects'
241 for k
,v
in self
.iteritems():
244 'forces load of all annotation objects into cache'
245 return [(k
,self
.sliceAnnotation(k
,sliceInfo
,limitCache
=False))
246 for (k
,sliceInfo
) in self
.sliceDB
.items()]
248 'forces load of all annotation objects into cache'
249 return [self
.sliceAnnotation(k
,sliceInfo
,limitCache
=False)
250 for (k
,sliceInfo
) in self
.sliceDB
.items()]
251 def add_homology(self
, seq
, search
, id=None, idFormat
='%s_%d',
252 autoIncrement
=False, maxAnnot
=999999,
253 maxLoss
=None, sliceInfo
=None, **kwargs
):
254 'find homology in our seq db and add as annotations'
255 try: # ENSURE THAT sliceAttrDict COMPATIBLE WITH OUR TUPLE FORMAT
256 if self
.sliceAttrDict
['id'] != 0:
259 sliceAttrDict
['id'] = 0 # USE TUPLE AS OUR INTERNAL STANDARD FORMAT
260 sliceAttrDict
['start'] = 1
261 sliceAttrDict
['stop'] = 2
263 id = len(self
.sliceDB
)
266 if isinstance(search
,str): # GET SEARCH METHOD
267 search
= getattr(self
.seqDB
,search
)
268 if isinstance(seq
,str): # CREATE A SEQ OBJECT
269 seq
= Sequence(seq
,str(id))
270 al
= search(seq
,**kwargs
) # RUN THE HOMOLOGY SEARCH
271 if maxLoss
is not None: # REQUIRE HIT BE AT LEAST A CERTAIN LENGTH
272 kwargs
['minAlignSize'] = len(seq
)-maxLoss
273 hits
= al
[seq
].keys(**kwargs
) # OBTAIN LIST OF HIT INTERVALS
274 if len(hits
)>maxAnnot
:
275 raise ValueError('too many hits for %s: %d' %(id,len(hits
)))
279 for ival
in hits
: # CREATE ANNOTATION FOR EACH HIT
280 if len(hits
)>1: # NEED TO CREATE AN ID FOR EACH HIT
282 k
= len(self
.sliceDB
)
286 if sliceInfo
is not None: # SAVE SLICE AS TUPLE WITH INFO
287 a
= self
.new_annotation(k
, (ival
.id,ival
.start
,ival
.stop
)+sliceInfo
)
289 a
= self
.new_annotation(k
, (ival
.id,ival
.start
,ival
.stop
))
290 out
.append(a
) # RETURN THE ANNOTATION
293 'if sliceDB needs to be closed, do it and return True, otherwise False'
295 if self
._wroteSliceDB
:
297 self
._wroteSliceDB
= False # DISK FILE IS UP TO DATE
299 except AttributeError:
305 print >>sys
.stderr
,'''
306 WARNING: you forgot to call AnnotationDB.close() after writing
307 new annotation data to it. This could result in failure to properly
308 store the data in the associated disk file. To avoid this, we
309 have automatically called AnnotationDB.sliceDB.close() to write the data
310 for you, when the AnnotationDB was deleted.'''
312 def clear_cache(self
):
314 self
._weakValueDict
.clear()
315 # not clear what this should do for AnnotationDB
317 raise NotImplementedError, "nonsensical in AnnotationDB"
318 def setdefault(self
, k
, d
=None):
319 raise NotImplementedError, "nonsensical in AnnotationDB"
320 def update(self
, other
):
321 raise NotImplementedError, "nonsensical in AnnotationDB"
323 # these methods should not be implemented for read-only database.
325 raise NotImplementedError, "no deletions allowed"
327 raise NotImplementedError, "no deletions allowed"
329 raise NotImplementedError, "no deletions allowed"
331 class AnnotationServer(AnnotationDB
):
332 'XMLRPC-ready server for AnnotationDB'
333 xmlrpc_methods
={'get_slice_tuple':0,'get_slice_items':0,
334 'get_annotation_attr':0, 'keys':0,
335 '__len__':0, '__contains__':0}
336 def get_slice_tuple(self
, k
):
337 'get (seqID,start,stop) for a given key'
339 sliceInfo
= self
.sliceDB
[k
]
341 return '' # XMLRPC-acceptable failure code
342 start
= int(self
.getSliceAttr(sliceInfo
,'start'))
343 stop
= int(self
.getSliceAttr(sliceInfo
,'stop'))
345 if int(self
.getSliceAttr(sliceInfo
,'orientation'))<0 and start
>=0:
346 start
,stop
= (-stop
,-start
) # NEGATIVE ORIENTATION COORDINATES
347 except AttributeError:
349 return (self
.getSliceAttr(sliceInfo
, 'id'), start
, stop
)
350 def get_slice_items(self
):
351 'get all (key,tuple) pairs in one query'
353 for k
in self
.sliceDB
:
354 l
.append((k
,self
.get_slice_tuple(k
)))
356 def get_annotation_attr(self
, k
, attr
):
357 'get the requested attribute of the requested key'
359 sliceInfo
= self
.sliceDB
[k
]
363 return self
.getSliceAttr(sliceInfo
, attr
)
364 except AttributeError:
367 class AnnotationClientSliceDB(dict):
368 'proxy just queries the server'
369 def __init__(self
, db
):
372 def __getitem__(self
, k
):
374 return dict.__getitem
__(self
, k
)
376 t
= self
.db
.server
.get_slice_tuple(k
)
378 raise KeyError('no such annotation: ' + str(k
))
379 dict.__setitem
__(self
, k
, t
)
381 def __setitem__(self
, k
, v
): raise ValueError('XMLRPC client is read-only')
382 def keys(self
): return self
.db
.server
.keys()
383 def __iter__(self
): return iter(self
.keys())
384 def items(self
): return self
.db
.server
.get_slice_items()
385 def iteritems(self
): return iter(self
.items())
386 def __len__(self
): return self
.db
.server
.__len
__()
387 def __contains__(self
, k
): return self
.db
.server
.__contains
__(k
)
389 class AnnotationClient(AnnotationDB
):
390 'XMLRPC AnnotationDB client'
391 def __init__(self
, url
, name
, seqDB
,itemClass
=AnnotationSeq
,
392 itemSliceClass
=AnnotationSlice
, autoGC
=True, **kwargs
):
393 if autoGC
: # automatically garbage collect unused objects
394 self
._weakValueDict
= classutil
.RecentValueDictionary(autoGC
)
396 self
._weakValueDict
= {} # object cache
399 self
.server
= coordinator
.get_connection(url
, name
)
403 self
.sliceDB
= AnnotationClientSliceDB(self
)
404 self
.itemClass
= itemClass
405 self
.itemSliceClass
= itemSliceClass
406 def __getstate__(self
):
407 return dict(url
=self
.url
, name
=self
.name
, seqDB
=self
.seqDB
,
409 def getSliceAttr(self
, sliceInfo
, attr
):
410 if attr
=='id': return sliceInfo
[0]
411 elif attr
=='start': return sliceInfo
[1]
412 elif attr
=='stop': return sliceInfo
[2]
413 elif attr
=='orientation': raise AttributeError('ori not saved')
415 v
= self
.server
.get_annotation_attr(sliceInfo
[0], attr
)
417 raise AttributeError('this annotation has no attr: ' + attr
)